diff options
author | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:14:08 -0600 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:14:08 -0600 |
commit | 01b426837b1aa72d3535ec0f27b13e43993311bc (patch) | |
tree | 2dec36611a823f80ee677bd649b0f04ce5d885ee /ragel | |
parent | 1058d1029a9a1dda7d6d4c4e07fc775bb1ae4cd0 (diff) | |
download | colm-01b426837b1aa72d3535ec0f27b13e43993311bc.tar.gz |
moved /ragel-repos/src to /ragel
This will be what we build after building colm.
Diffstat (limited to 'ragel')
178 files changed, 57858 insertions, 0 deletions
diff --git a/ragel/.exrc b/ragel/.exrc new file mode 100644 index 00000000..412b360f --- /dev/null +++ b/ragel/.exrc @@ -0,0 +1,28 @@ +if &cp | set nocp | endif +let s:cpo_save=&cpo +set cpo&vim +map <NL> j +map k +map Q gq +nmap gx <Plug>NetrwBrowseX +nnoremap <silent> <Plug>NetrwBrowseX :call netrw#NetrwBrowseX(expand("<cWORD>"),0)
+let &cpo=s:cpo_save +unlet s:cpo_save +set autoindent +set autowriteall +set backspace=2 +set fileencodings=ucs-bom,utf-8,default,latin1 +set helplang=en +set incsearch +set nojoinspaces +set makeprg=make\ -j4 +set printoptions=paper:letter +set ruler +set runtimepath=~/.vim,/var/lib/vim/addons,/usr/share/vim/vimfiles,/usr/share/vim/vim74,/usr/share/vim/vimfiles/after,/var/lib/vim/addons/after,~/.vim/after +set showcmd +set showmatch +set suffixes=.bak,~,.swp,.o,.info,.aux,.log,.dvi,.bbl,.blg,.brf,.cb,.ind,.idx,.ilg,.inx,.out,.toc +set viminfo='20,\"50 +set visualbell +set nowritebackup +" vim: set ft=vim : diff --git a/ragel/.gitignore b/ragel/.gitignore new file mode 100644 index 00000000..b4f836c6 --- /dev/null +++ b/ragel/.gitignore @@ -0,0 +1,55 @@ +/tags +/Makefile +/Makefile.in +/rlscan.cc +/rlparse.cc +/rlparse.h +/version.h +/config.h +/config.h.in +/config.h.in~ +/ragel +/ragel.exe +/.deps +/stamp-h1 +/rlhc +/rlhc.c + +/*.lo + +# Parsing +/parse.c +/rlreduce.cc +/ldparse.c +/ldreduce.cc + +# Common testing file. +/tmp.rl +/tmp.c +/tmp.cc +/tmp.d +/tmp.go +/tmp.ps +/tmp.ml +/tmp.cmi +/tmp.cmx +/tmp.rs +/tmp.crk +/tmp.jl +/tmp +/input + +# The ragel frontend doesn't support OCaml lexical rules yet, so a util is +# needed. +/util.ml +/util.cmi +/util.cmx + +/libragel.a +/libragel.la +/libfsm.a +/libfsm.la +/.libs + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/CMakeLists.txt b/ragel/CMakeLists.txt new file mode 100644 index 00000000..f59a85d6 --- /dev/null +++ b/ragel/CMakeLists.txt @@ -0,0 +1,145 @@ +# Check type size +include(CheckTypeSize) +check_type_size("int" SIZEOF_INT) +check_type_size("long" SIZEOF_LONG) + +# Check system headers +include(CheckIncludeFile) +check_include_file(sys/wait.h HAVE_SYS_WAIT_H) + +# Prepare settings +string(TOLOWER ${PROJECT_NAME} _PACKAGE_NAME) +if("${CMAKE_BUILD_TYPE}" MATCHES "[Dd][Ee][Bb]") + set(DEBUG 1) +endif() +set(VERSION "${PROJECT_VERSION}") +set(PUBDATE "${PROJECT_PUBDATE}") + +set(common_COMPILE_DEFINITIONS PREFIX="${CMAKE_INSTALL_PREFIX}") + +## Generate headers +configure_file(version.h.cmake.in version.h @ONLY) +configure_file(config.h.cmake.in config.h @ONLY) +configure_file(ragel-config.cmake.in + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" @ONLY) + +# Runtime headers +set(RUNTIME_HDR + action.h fsmgraph.h ragel.h common.h + gendata.h redfsm.h dot.h) + +# Other CMake modules +include(GNUInstallDirs) + +# libfsm + +add_library(libfsm + buffer.h codegen.h + actloop.h actexp.h + tables.h + binary.h bingoto.h binbreak.h binvar.h + flat.h flatgoto.h flatbreak.h flatvar.h + switch.h switchgoto.h switchbreak.h switchvar.h + goto.h gotoloop.h gotoexp.h + ipgoto.h asm.h + idbase.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc fsmgraph.cc + fsmap.cc fsmcond.cc fsmnfa.cc common.cc redfsm.cc gendata.cc + allocgen.cc codegen.cc + actexp.cc binvar.cc + tables.cc tabgoto.cc tabbreak.cc tabvar.cc + binary.cc bingoto.cc binbreak.cc actloop.cc + flat.cc flatgoto.cc flatbreak.cc flatvar.cc + switch.cc switchgoto.cc switchbreak.cc switchvar.cc + goto.cc gotoloop.cc gotoexp.cc ipgoto.cc + dot.cc asm.cc) + +target_include_directories(libfsm + PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl> + $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) + +set_target_properties(libfsm PROPERTIES + OUTPUT_NAME fsm) + +# libragel + +add_library(libragel + # dist + parsedata.h parsetree.h inputdata.h pcheck.h reducer.h rlscan.h load.h + parsetree.cc longest.cc parsedata.cc inputdata.cc load.cc reducer.cc) + +target_link_libraries(libragel colm::libcolm) + +target_include_directories(libragel + PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl> + $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) + +set_target_properties(libragel PROPERTIES + OUTPUT_NAME ragel) + +set_property(TARGET libragel APPEND PROPERTY + COMPILE_DEFINITIONS BINDIR="${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}") + +# ragel program + +set(RAGEL_LM + rlparse.lm + ragel.lm + rlreduce.lm) + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/parse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS ${RAGEL_LM} #$(COLM_BINDEP) + COMMAND colm::colm + ARGS -c -b rlparseC + -o "${CMAKE_CURRENT_BINARY_DIR}/parse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel + main.cc + "${CMAKE_CURRENT_BINARY_DIR}/parse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel libragel libfsm) + +foreach(_SUBDIR host-ruby host-asm host-julia host-ocaml host-c host-d + host-csharp host-go host-java host-rust host-crack host-js) + add_subdirectory(${_SUBDIR}) +endforeach() + +if(${PROJECT_NAME}_MAKE_INSTALL) + if(NOT DEFINED CMAKE_INSTALL_CMAKEDIR) + set(CMAKE_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/${_PACKAGE_NAME}" + CACHE STRING "CMake packages") + endif() + install(FILES ${RUNTIME_HDR} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/ragel") + install(TARGETS libfsm libragel ragel + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") + install(EXPORT ${_PACKAGE_NAME}-targets + NAMESPACE ${_PACKAGE_NAME}:: + DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") + export(EXPORT ${_PACKAGE_NAME}-targets + NAMESPACE ${_PACKAGE_NAME}:: + FILE "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-targets.cmake") + include(CMakePackageConfigHelpers) + write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" + VERSION ${${PROJECT_NAME}_VERSION} + COMPATIBILITY AnyNewerVersion) + install(FILES + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" + DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") +endif() diff --git a/ragel/Makefile.am b/ragel/Makefile.am new file mode 100644 index 00000000..dd41e56f --- /dev/null +++ b/ragel/Makefile.am @@ -0,0 +1,157 @@ +SUBDIRS = . host-ruby host-asm host-julia host-ocaml host-c \ + host-d host-csharp host-go host-java host-rust host-crack host-js + +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ +KELBT = @KELBT@ +RAGEL = @RAGEL@ + +# libfsm contains only the FSM construction code and the backend code +# generators. It is useful for building code generators in programs not +# connected to the ragel language. +# +# libragel contains the parse tree and other parsing support code. Everything +# except the reducers, which are specific to the frontends. +lib_LTLIBRARIES = libfsm.la libragel.la + +if BUILD_PROGRAM + +bin_PROGRAMS = ragel + +endif + +pkginclude_HEADERS = \ + action.h fsmgraph.h ragel.h common.h \ + gendata.h redfsm.h dot.h + +nodist_pkginclude_HEADERS = config.h + +data_DATA = ragel.lm + +# +# libfsm: state machine construction and direct code generation. +# +libfsm_la_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_libfsm_la_SOURCES = \ + buffer.h codegen.h \ + actloop.h actexp.h \ + tables.h \ + binary.h bingoto.h binbreak.h binvar.h \ + flat.h flatgoto.h flatbreak.h flatvar.h \ + switch.h switchgoto.h switchbreak.h switchvar.h \ + goto.h gotoloop.h gotoexp.h \ + ipgoto.h asm.h \ + idbase.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc fsmgraph.cc \ + fsmap.cc fsmcond.cc fsmnfa.cc common.cc redfsm.cc gendata.cc \ + allocgen.cc codegen.cc \ + actexp.cc binvar.cc \ + tables.cc tabgoto.cc tabbreak.cc tabvar.cc \ + binary.cc bingoto.cc binbreak.cc actloop.cc \ + flat.cc flatgoto.cc flatbreak.cc flatvar.cc \ + switch.cc switchgoto.cc switchbreak.cc switchvar.cc \ + goto.cc gotoloop.cc gotoexp.cc ipgoto.cc \ + dot.cc asm.cc + +nodist_libfsm_la_SOURCES = \ + version.h + +libfsm_la_LDFLAGS = -no-undefined + +# +# libragel: ragel program minus host-specific code +# +libragel_la_CPPFLAGS = -I$(top_srcdir)/aapl -DBINDIR='"@bindir@"' + +dist_libragel_la_SOURCES = \ + parsedata.h parsetree.h inputdata.h pcheck.h reducer.h rlscan.h load.h \ + parsetree.cc longest.cc parsedata.cc inputdata.cc load.cc reducer.cc + +libragel_la_LDFLAGS = -no-undefined +libragel_la_LIBADD = libfsm.la $(COLM_LD) + +if LINKER_NO_UNDEFINED +libfsm_la_LDFLAGS += -Wl,--no-undefined +libragel_la_LDFLAGS += -Wl,--no-undefined +endif + +# +# ragel program. +# +ragel_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_SOURCES = \ + main.cc + +nodist_ragel_SOURCES = \ + parse.c rlreduce.cc + +ragel_LDADD = libragel.la libfsm.la $(COLM_LD) +ragel_DEPENDENCIES = libragel.la libfsm.la $(COLM_LIBDEP) + +BUILT_SOURCES = \ + version.h \ + parse.c rlreduce.cc \ + include/ragel + +include/ragel: + mkdir -p include + ln -s .. include/ragel + +if WITH_RAGEL_KELBT +nodist_ragel_SOURCES += \ + rlscan.cc rlparse.cc rlparse.h + +BUILT_SOURCES += \ + rlscan.cc rlparse.cc rlparse.h +endif + +version.h: Makefile + echo '#define VERSION "$(PACKAGE_VERSION)"' > version.h + echo '#define PUBDATE "$(PUBDATE)"' >> version.h + +EXTRA_DIST = \ + $(RAGEL_LM) \ + rlscan.rl \ + rlparse.kh \ + rlparse.kl \ + ril.lm \ + rlhc-main.lm + +CLEANFILES = parse.c commit.cc rlhc.c + +if WITH_RAGEL_KELBT +CLEANFILES += rlscan.cc rlparse.cc rlparse.h +endif + +RAGEL_LM = \ + rlparse.lm \ + ragel.lm \ + rlreduce.lm + +parse.c: $(RAGEL_LM) $(COLM_BINDEP) + $(COLM) -c -b rlparseC -o $@ -m rlreduce.cc $< + +rlreduce.cc: parse.c + + +if WITH_RAGEL_KELBT + +# This dependency comes from the import of the parser defines into the scanner. +rlscan.cc: rlparse.h +inputdata.cc: rlparse.h + +ragel-rlscan.$(OBJEXT): rlscan.cc + +rlscan.cc: rlscan.rl + $(RAGEL) -G2 -I$(builddir) -o $@ $< + +rlparse.h: rlparse.kh + $(KELBT) -o $@ $< + +rlparse.cc: rlparse.kl rlparse.kh + $(KELBT) -o $@ $< + +endif diff --git a/ragel/actexp.cc b/ragel/actexp.cc new file mode 100644 index 00000000..771d4623 --- /dev/null +++ b/ragel/actexp.cc @@ -0,0 +1,218 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "actexp.h" +#include "redfsm.h" +#include "gendata.h" + +void ActExp::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId + 1; + fromStateActions.value( act ); +} + +void ActExp::COND_ACTION( RedCondPair *cond ) +{ + int action = 0; + if ( cond->action != 0 ) + action = cond->action->actListId + 1; + condActions.value( action ); +} + +void ActExp::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId + 1; + toStateActions.value( act ); +} + +void ActExp::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId + 1; + eofActions.value( act ); +} + +void ActExp::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void ActExp::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &ActExp::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, false, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &ActExp::ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, false, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &ActExp::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, false, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &ActExp::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, true, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + + +void ActExp::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[" << vCS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void ActExp::REG_ACTIONS( std::string cond ) +{ + out << + " switch ( " << ARR_REF( condActions ) << "[" << cond << "] ) {\n"; + ACTION_SWITCH() << + " }\n" + "\n"; +} +void ActExp::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << ARR_REF( toStateActions ) << "[" << vCS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + + +void ActExp::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " switch ( " << ARR_REF( eofActions ) << "[" << vCS() << "] ) {\n"; + EOF_ACTION_SWITCH() << + " }\n"; + } +} + +void ActExp::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[nfa_bp[nfa_len].state] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + diff --git a/ragel/actexp.h b/ragel/actexp.h new file mode 100644 index 00000000..49165755 --- /dev/null +++ b/ragel/actexp.h @@ -0,0 +1,62 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _BINEXP_H +#define _BINEXP_H + +#include "binary.h" + +struct RedStateAp; +struct RedCondPair; + +class ActExp + : public virtual Tables +{ +public: + ActExp( const CodeGenArgs &args ) + : + Tables( args ) + {} + + virtual void FROM_STATE_ACTION( RedStateAp *state ); + virtual void COND_ACTION( RedCondPair *cond ); + virtual void TO_STATE_ACTION( RedStateAp *state ); + virtual void EOF_ACTION( RedStateAp *state ); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &ACTION_SWITCH(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + virtual void TO_STATE_ACTIONS(); + virtual void REG_ACTIONS( std::string cond ); + virtual void FROM_STATE_ACTIONS(); + virtual void EOF_ACTIONS(); + + virtual void NFA_FROM_STATE_ACTION_EXEC(); +}; + +#endif + diff --git a/ragel/action.h b/ragel/action.h new file mode 100644 index 00000000..39169202 --- /dev/null +++ b/ragel/action.h @@ -0,0 +1,116 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ACTION_H +#define _ACTION_H + +#include "fsmgraph.h" + +struct NameInst; +struct NameRef; +struct LongestMatch; +struct InlineList; + +/* + * Inline code tree + */ +struct InlineItem +{ + enum Type + { + Text, Goto, Call, Ncall, Next, GotoExpr, CallExpr, NcallExpr, NextExpr, Ret, Nret, + PChar, Char, Hold, Curs, Targs, Entry, Exec, Break, Nbreak, + LmSwitch, LmSetActId, LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, + LmInitAct, LmInitTokStart, LmSetTokStart, LmNfaOnNext, LmNfaOnLast, LmNfaOnEof, Stmt, Subst, + NfaWrapAction, NfaWrapConds + }; + + InlineItem( const InputLoc &loc, std::string data, Type type ) : + loc(loc), data(data), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : + loc(loc), nameRef(nameRef), children(0), type(type) { } + + InlineItem( const InputLoc &loc, LongestMatch *longestMatch, + LongestMatchPart *longestMatchPart, Type type ) : loc(loc), + nameRef(0), children(0), longestMatch(longestMatch), + longestMatchPart(longestMatchPart), type(type) { } + + InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : + loc(loc), nameRef(0), nameTarg(nameTarg), children(0), + type(type) { } + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, Action *wrappedAction, Type type ) + : + loc(loc), nameRef(0), children(0), longestMatch(0), + longestMatchPart(0), wrappedAction(wrappedAction), type(type) + {} + + InlineItem( const InputLoc &loc, CondSpace *condSpace, + const CondKeySet &condKeySet, Type type ) + : + loc(loc), nameRef(0), children(0), longestMatch(0), + longestMatchPart(0), wrappedAction(0), condSpace(condSpace), + condKeySet(condKeySet), type(type) + {} + + ~InlineItem(); + + InputLoc loc; + std::string data; + NameRef *nameRef; + NameInst *nameTarg; + InlineList *children; + LongestMatch *longestMatch; + LongestMatchPart *longestMatchPart; + long substPos; + Action *wrappedAction; + CondSpace *condSpace; + CondKeySet condKeySet; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + +struct InlineBlock +{ + InlineBlock( const InputLoc &loc, InlineList *inlineList ) + : loc(loc), inlineList(inlineList) {} + + ~InlineBlock() + { + inlineList->empty(); + delete inlineList; + } + + InputLoc loc; + InlineList *inlineList; +}; + +#endif diff --git a/ragel/actloop.cc b/ragel/actloop.cc new file mode 100644 index 00000000..3d16b9ea --- /dev/null +++ b/ragel/actloop.cc @@ -0,0 +1,229 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "actloop.h" +#include "redfsm.h" +#include "gendata.h" + +void ActLoop::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + fromStateActions.value( act ); +} + +void ActLoop::COND_ACTION( RedCondPair *cond ) +{ + int act = 0; + if ( cond->action != 0 ) + act = cond->action->location+1; + condActions.value( act ); +} + +void ActLoop::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + toStateActions.value( act ); +} + +void ActLoop::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + eofActions.value( act ); +} + +void ActLoop::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void ActLoop::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + +std::ostream &ActLoop::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &ActLoop::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &ActLoop::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + + +std::ostream &ActLoop::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, true, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + + + +void ActLoop::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( fromStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST(UINT()) << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + +void ActLoop::REG_ACTIONS( std::string cond ) +{ + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( condActions ) + "[" + cond + "]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << " )\n" + " {\n"; + ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; +} + +void ActLoop::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( toStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST(UINT()) << DEREF( ARR_REF( actions ), string(acts) ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), string(acts) ) << " ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + +void ActLoop::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( eofActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST(UINT()) << DEREF( ARR_REF( actions ), string(acts) ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), string(acts) ) << " ) {\n"; + EOF_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n"; + } +} + +void ActLoop::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( fromStateActions ) + "[nfa_bp[nfa_len].state]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), string(acts) ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), string(acts) ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + diff --git a/ragel/actloop.h b/ragel/actloop.h new file mode 100644 index 00000000..238ba72a --- /dev/null +++ b/ragel/actloop.h @@ -0,0 +1,63 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _BINLOOP_H +#define _BINLOOP_H + +#include "binary.h" +#include "tables.h" + +struct RedStateAp; +struct RedCondPair; + +class ActLoop + : public virtual Tables +{ +public: + ActLoop( const CodeGenArgs &args ) + : + Tables( args ) + {} + + virtual void FROM_STATE_ACTION( RedStateAp *state ); + virtual void COND_ACTION( RedCondPair *cond ); + virtual void TO_STATE_ACTION( RedStateAp *state ); + virtual void EOF_ACTION( RedStateAp *state ); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &ACTION_SWITCH(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + virtual void FROM_STATE_ACTIONS(); + virtual void REG_ACTIONS( std::string cond ); + virtual void TO_STATE_ACTIONS(); + virtual void EOF_ACTIONS(); + + virtual void NFA_FROM_STATE_ACTION_EXEC(); +}; + + +#endif diff --git a/ragel/allocgen.cc b/ragel/allocgen.cc new file mode 100644 index 00000000..fee37940 --- /dev/null +++ b/ragel/allocgen.cc @@ -0,0 +1,138 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include "gendata.h" +#include "inputdata.h" +#include "version.h" + +/* + * Code generators. + */ +#include "bingoto.h" +#include "binbreak.h" +#include "binvar.h" +#include "flatgoto.h" +#include "flatbreak.h" +#include "flatvar.h" +#include "switchgoto.h" +#include "switchbreak.h" +#include "switchvar.h" +#include "gotoloop.h" +#include "gotoexp.h" +#include "ipgoto.h" +#include "asm.h" + +CodeGenData *makeCodeGenAsm( const HostLang *hostLang, const CodeGenArgs &args ) +{ + return new AsmCodeGen( args ); +} + +/* Invoked by the parser when a ragel definition is opened. */ +CodeGenData *makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ) +{ + FsmGbl *id = args.id; + CodeGenData *codeGen = 0; + BackendFeature feature = hostLang->feature; + if ( args.forceVar ) + feature = VarFeature; + + switch ( args.codeStyle ) { + case GenBinaryLoop: + if ( feature == GotoFeature ) + codeGen = new BinGotoLoop( args ); + else if ( feature == BreakFeature ) + codeGen = new BinBreakLoop( args ); + else + codeGen = new BinVarLoop( args ); + break; + + case GenBinaryExp: + if ( feature == GotoFeature ) + codeGen = new BinGotoExp( args ); + else if ( feature == BreakFeature ) + codeGen = new BinBreakExp( args ); + else + codeGen = new BinVarExp( args ); + break; + + case GenFlatLoop: + if ( feature == GotoFeature ) + codeGen = new FlatGotoLoop( args ); + else if ( feature == BreakFeature ) + codeGen = new FlatBreakLoop( args ); + else + codeGen = new FlatVarLoop( args ); + break; + + case GenFlatExp: + if ( feature == GotoFeature ) + codeGen = new FlatGotoExp( args ); + else if ( feature == BreakFeature ) + codeGen = new FlatBreakExp( args ); + else + codeGen = new FlatVarExp( args ); + break; + case GenSwitchLoop: + if ( feature == GotoFeature ) + codeGen = new SwitchGotoLoop( args ); + else if ( feature == BreakFeature ) + codeGen = new SwitchBreakLoop( args ); + else + codeGen = new SwitchVarLoop( args ); + break; + + case GenSwitchExp: + if ( feature == GotoFeature ) + codeGen = new SwitchGotoExp( args ); + else if ( feature == BreakFeature ) + codeGen = new SwitchBreakExp( args ); + else + codeGen = new SwitchVarExp( args ); + break; + + + case GenGotoLoop: + if ( feature == GotoFeature ) + codeGen = new GotoLoop(args); + else + id->error() << "unsupported lang/style combination" << endp; + break; + case GenGotoExp: + if ( feature == GotoFeature ) + codeGen = new GotoExp(args); + else + id->error() << "unsupported lang/style combination" << endp; + break; + + case GenIpGoto: + if ( feature == GotoFeature ) + codeGen = new IpGoto(args); + else + id->error() << "unsupported lang/style combination" << endp; + break; + } + + return codeGen; +} diff --git a/ragel/asm.cc b/ragel/asm.cc new file mode 100644 index 00000000..073f64dc --- /dev/null +++ b/ragel/asm.cc @@ -0,0 +1,2047 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "asm.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" +#include "ragel.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" +#include "parsedata.h" +#include "inputdata.h" +#include <sstream> + +using std::ostream; +using std::ostringstream; +using std::string; +using std::endl; +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +extern int numSplitPartitions; +bool printStatistics = false; + +/* Enables transition logging in the form that score-based state sorting can + * processes. This bit of code is intended to increase locality and reduce + * cache misses. Gains are minimal, 1-2%. */ +// #define LOG_TRANS 1 + +void asmLineDirective( ostream &out, const char *fileName, int line ) +{ + /* Write the preprocessor line info for to the input file. */ + out << "#line " << line << " \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } + out << '"'; + + out << '\n'; +} + +/* Init code gen with in parameters. */ +AsmCodeGen::AsmCodeGen( const CodeGenArgs &args ) +: + CodeGenData( args ), + nextLmSwitchLabel( 1 ), + stackCS( false ) +{ +} + +void AsmCodeGen::genAnalysis() +{ + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + + /* Choose default transitions and make the flat transitions by character class. */ + redFsm->chooseDefaultSpan(); + redFsm->makeFlatClass(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); +} + +/* Write out the fsm name. */ +string AsmCodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string AsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +string AsmCodeGen::ACCESS() +{ + ostringstream ret; + if ( red->accessExpr != 0 ) + INLINE_LIST( ret, red->accessExpr, 0, false, false ); + return ret.str(); +} + + +string AsmCodeGen::P() +{ + ostringstream ret; + if ( red->pExpr == 0 ) + ret << "%r12"; + else { + INLINE_LIST( ret, red->pExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::PE() +{ + ostringstream ret; + if ( red->peExpr == 0 ) + ret << "%r13"; + else { + INLINE_LIST( ret, red->peExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::vCS() +{ + ostringstream ret; + if ( red->csExpr == 0 ) { + if ( stackCS ) + ret << "-48(%rbp)"; + else + ret << "%r11"; + } + else { + INLINE_LIST( ret, red->csExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::TOP() +{ + ostringstream ret; + if ( red->topExpr == 0 ) + ret << "-64(%rbp)"; + else { + ret << "("; + INLINE_LIST( ret, red->topExpr, 0, false, false ); + ret << ")"; + } + return ret.str(); +} + +string AsmCodeGen::NFA_STACK() +{ + return string( "-80(%rbp)" ); +} + +string AsmCodeGen::NFA_TOP() +{ + return string( "-88(%rbp)" ); +} + +string AsmCodeGen::NFA_SZ() +{ + return string( "-96(%rbp)" ); +} + +string AsmCodeGen::STACK() +{ + ostringstream ret; + if ( red->stackExpr == 0 ) + ret << "-56(%rbp)"; + else { + ret << "("; + INLINE_LIST( ret, red->stackExpr, 0, false, false ); + ret << ")"; + } + return ret.str(); +} + +string AsmCodeGen::vEOF() +{ + ostringstream ret; + if ( red->eofExpr == 0 ) + ret << "-8(%rbp)"; + else { + INLINE_LIST( ret, red->eofExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::TOKSTART() +{ + ostringstream ret; + if ( red->tokstartExpr == 0 ) + ret << "-16(%rbp)"; + else { + INLINE_LIST( ret, red->tokstartExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::TOKEND() +{ + ostringstream ret; + if ( red->tokendExpr == 0 ) + ret << "-24(%rbp)"; + else { + INLINE_LIST( ret, red->tokendExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::ACT() +{ + ostringstream ret; + if ( red->actExpr == 0 ) + ret << "-32(%rbp)"; + else { + INLINE_LIST( ret, red->actExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::NBREAK() +{ + return string("-33(%rbp)"); +} + +string AsmCodeGen::GET_KEY() +{ + ostringstream ret; + if ( red->getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, red->getKeyExpr, 0, false, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "(" << P() << ")"; + } + return ret.str(); +} + +string AsmCodeGen::COND_KEY( CondKey key ) +{ + ostringstream ret; + ret << "$" << key.getVal(); + return ret.str(); +} + + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string AsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + ret << "$" << key.getVal(); + return ret.str(); +} + +bool AsmCodeGen::isAlphTypeSigned() +{ + return keyOps->isSigned; +} + +void AsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + + ret << + " subq $1, "; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << + "\n" + " movq "; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << ", " << P() << "\n"; +} + +void AsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item, + int targState, int inFinish, bool csForced ) +{ + long done = nextLmSwitchLabel++; + + ret << + " movq " << ACT() << ", %rax\n"; + + for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) { + long l = nextLmSwitchLabel++; + + /* Write the case label, the action and the case break. */ + if ( lma->lmId < 0 ) { + } + else { + ret << + " cmpq $" << lma->lmId << ", %rax\n" + " jne " << LABEL( "lm_switch_next", l ) << "\n"; + } + + INLINE_LIST( ret, lma->children, targState, inFinish, csForced ); + + ret << + " jmp " << LABEL( "lm_done", done ) << "\n" + "" << LABEL( "lm_switch_next", l ) << ":\n"; + } + + ret << + "" << LABEL( "lm_done", done ) << ":\n"; +} + +void AsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq $" << item->lmId << ", " << ACT() << "\n"; +} + +void AsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + /* Sets tokend, there may be an offset. */ + ret << + " movq " << P() << ", %rax\n"; + + if ( item->offset != 0 ) { + out << + " addq $" << item->offset << ", %rax\n"; + } + + out << + " movq %rax, " << TOKEND() << "\n"; +} + +void AsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq " << TOKEND() << ", " << "%rax\n"; +} + +void AsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq $0, " << TOKSTART() << "\n"; +} + +void AsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq $0, " << ACT() << "\n"; +} + +void AsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq " << P() << ", " << TOKSTART() << "\n"; +} + +void AsmCodeGen::HOST_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::HOST_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::HOST_TEXT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::GEN_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::GEN_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D code. + * If the inline list is a single word it will get interpreted as a C-style + * cast by the D compiler. This should be in the D code generator. */ + INLINE_LIST( ret, item->children, targState, inFinish, false ); + + ret << + " movq %rax, " << P() << "\n" + " subq $1, " << P() << "\n"; +} + +void AsmCodeGen::NBREAK( ostream &ret, int targState, bool csForced ) +{ + outLabelUsed = true; + ret << + " addq $1, " << P() << "\n"; + + if ( !csForced ) { + ret << + " movq $" << targState << ", " << vCS() << "\n"; + } + + ret << + " movb $1, " << NBREAK() << "\n" + " jmp " << LABEL( "pop" ) << "\n"; +} + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void AsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Text: + ret << item->data; + break; + case GenInlineItem::Goto: + GOTO( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Call: + CALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::Next: + NEXT( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Ret: + RET( ret, inFinish ); + break; + case GenInlineItem::PChar: + ret << P(); + break; + case GenInlineItem::Char: + ret << GET_KEY(); + break; + case GenInlineItem::Hold: + ret << + " subq $1, " << P() << "\n"; + break; + case GenInlineItem::Exec: + EXEC( ret, item, targState, inFinish ); + break; + case GenInlineItem::Curs: + CURS( ret, inFinish ); + break; + case GenInlineItem::Targs: + TARGS( ret, inFinish, targState ); + break; + case GenInlineItem::Entry: + ret << item->targState->id; + break; + case GenInlineItem::GotoExpr: + GOTO_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::CallExpr: + CALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::NextExpr: + NEXT_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case GenInlineItem::LmGetTokEnd: + GET_TOKEND( ret, item ); + break; + case GenInlineItem::LmInitTokStart: + INIT_TOKSTART( ret, item ); + break; + case GenInlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case GenInlineItem::Break: + BREAK( ret, targState, csForced ); + break; + /* Stubbed. */ + case GenInlineItem::Ncall: + NCALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::NcallExpr: + NCALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::Nret: + NRET( ret, inFinish ); + break; + case GenInlineItem::Nbreak: + NBREAK( ret, targState, csForced ); + break; + case GenInlineItem::LmCase: + break; + + case GenInlineItem::LmExec: + LM_EXEC( ret, item, targState, inFinish ); + break; + + case GenInlineItem::LmHold: + ret << + " subq $1, " << P() << "\n"; + break; + case GenInlineItem::NfaClear: + ret << + " movq $0, " << NFA_TOP() << "\n"; + break; + + case GenInlineItem::HostStmt: + HOST_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostExpr: + HOST_EXPR( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostText: + HOST_TEXT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenStmt: + GEN_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenExpr: + GEN_EXPR( ret, item, targState, inFinish, csForced ); + break; + /* Handled at the top level. */ + case GenInlineItem::NfaWrapAction: + case GenInlineItem::NfaWrapConds: + break; + } + } +} +/* Write out paths in line directives. Escapes any special characters. */ +string AsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void AsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, + bool inFinish, bool csForced ) +{ + /* Write the preprocessor line info for going into the source file. */ + asmLineDirective( ret, action->loc.fileName, action->loc.line ); + + /* Write the block and close it off. */ + INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced ); +} + +void AsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << "\n"; + asmLineDirective( ret, condition->loc.fileName, condition->loc.line ); + INLINE_LIST( ret, condition->inlineList, 0, false, false ); +} + +bool singleItem( GenAction *action, GenInlineItem::Type type ) +{ + return action->inlineList->length() == 1 && + action->inlineList->head->type == type; +} + +void AsmCodeGen::NFA_CONDITION( ostream &ret, GenAction *condition, bool last ) +{ + if ( singleItem( condition, GenInlineItem::NfaWrapAction ) ) + { + GenAction *action = condition->inlineList->head->wrappedAction; + ACTION( out, action, 0, false, false ); + } + else if ( singleItem( condition, GenInlineItem::NfaWrapConds ) ) + { + GenCondSpace *condSpace = condition->inlineList->head->condSpace; + const CondKeySet &condKeySet = condition->inlineList->head->condKeySet; + + out << " movq $0, %r9\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << + " pushq %r9\n"; + + CONDITION( out, *csi ); + out << + "\n" + " test %eax, %eax\n" + " setne %cl\n" + " movsbq %cl, %rcx\n" + " salq $" << csi.pos() << ", %rcx\n" + " popq %r9\n" + " addq %rcx, %r9\n"; + } + + for ( int c = 0; c < condKeySet.length(); c++ ) { + CondKey key = condKeySet[c]; + out << + " cmpq " << COND_KEY( key ) << ", %r9\n" + " je 102f\n"; + } + + out << + " jmp " << LABEL( "pop_fail" ) << "\n" + "102:\n"; + } + else { + CONDITION( ret, condition ); + out << + " test %eax, %eax\n" + " jz " << LABEL( "pop_fail" ) << "\n"; + } +} + +string AsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string AsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +void AsmCodeGen::writeInit() +{ + if ( !noCS ) { + /* Don't use vCS here. vCS may assumes CS needs to be on the stack. + * Just use the interface register. */ + out << + " movq $" << redFsm->startState->id << ", %r11\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " movq $0, " << NFA_TOP() << "\n"; + } + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) { + out << + " movq $0, " << TOP() << "\n"; + } + + if ( red->hasLongestMatch ) { + out << + " movq $0, " << TOKSTART() << "\n" + " movq $0, " << TOKEND() << "\n" + " movq $0, " << ACT() << "\n"; + } +} + +string AsmCodeGen::DATA_PREFIX() +{ + if ( !noPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string AsmCodeGen::ALPH_TYPE() +{ + string ret = alphType->data1; + if ( alphType->data2 != 0 ) { + ret += " "; + ret += + alphType->data2; + } + return ret; +} + +void AsmCodeGen::STATIC_CONST_INT( const string &name, const string &value ) +{ + out << + " .align 8\n" + " .type " << name << ", @object\n" + " .size " << name << ", 8\n" << + name << ":\n" + " .long " << value << "\n"; +} + +void AsmCodeGen::STATE_IDS() +{ + if ( redFsm->startState != 0 ) + STATIC_CONST_INT( START(), START_STATE_ID() ); + + if ( !noFinal ) + STATIC_CONST_INT( FIRST_FINAL(), FIRST_FINAL_STATE() ); + + if ( !noError ) + STATIC_CONST_INT( ERROR(), ERROR_STATE() ); + + out << "\n"; + + if ( red->entryPointNames.length() > 0 ) { + for ( EntryNameVect::Iter en = red->entryPointNames; en.lte(); en++ ) { + ostringstream ret; + ret << redFsm->startState->id; + + STATIC_CONST_INT( string( DATA_PREFIX() + "en_" + *en ), + ret.str() ); + } + out << "\n"; + } +} + +void AsmCodeGen::writeStart() +{ + out << START_STATE_ID(); +} + +void AsmCodeGen::writeFirstFinal() +{ + out << FIRST_FINAL_STATE(); +} + +void AsmCodeGen::writeError() +{ + out << ERROR_STATE(); +} + +string AsmCodeGen::PTR_CONST() +{ + return "const "; +} + +string AsmCodeGen::PTR_CONST_END() +{ + return ""; +} + +std::ostream &AsmCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << " " << name << "[] = {\n"; + return out; +} + +std::ostream &AsmCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &AsmCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string AsmCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string AsmCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string AsmCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +string AsmCodeGen::NULL_ITEM() +{ + return "0"; +} + +string AsmCodeGen::POINTER() +{ + return " *"; +} + +std::ostream &AsmCodeGen::SWITCH_DEFAULT() +{ + return out; +} + +string AsmCodeGen::CTRL_FLOW() +{ + return ""; +} + +void AsmCodeGen::writeExports() +{ + if ( red->exportList.length() > 0 ) { + for ( ExportList::Iter ex = red->exportList; ex.lte(); ex++ ) { + out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " << + KEY(ex->key) << "\n"; + } + out << "\n"; + } +} + +string AsmCodeGen::LABEL( const char *type, long i ) +{ + std::stringstream s; + s << ".L" << red->machineId << "_" << type << "_" << i; + return s.str(); +} + +string AsmCodeGen::LABEL( const char *name ) +{ + std::stringstream s; + s << ".L" << red->machineId << "_" << name; + return s.str(); +} + +void AsmCodeGen::emitSingleIfElseIf( RedStateAp *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << + " cmpb " << KEY( data[j].lowKey ) << ", %r10b\n" + " je " << TRANS_GOTO_TARG( data[j].value ) << "\n"; + } +} + +void AsmCodeGen::emitSingleJumpTable( RedStateAp *state, string def ) +{ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + long long low = data[0].lowKey.getVal(); + long long high = data[numSingles-1].lowKey.getVal(); + + if ( def.size() == 0 ) + def = LABEL( "sjf", state->id ); + + out << + " movzbq %r10b, %rax\n" + " subq $" << low << ", %rax\n" + " cmpq $" << (high - low) << ", %rax\n" + " ja " << def << "\n" + " leaq " << LABEL( "sjt", state->id ) << "(%rip), %rcx\n" + " movslq (%rcx,%rax,4), %rdx\n" + " addq %rcx, %rdx\n" + " jmp *%rdx\n" + " .section .rodata\n" + " .align 4\n" + << LABEL( "sjt", state->id ) << ":\n"; + + for ( long long j = 0; j < numSingles; j++ ) { + /* Fill in gap between prev and this. */ + if ( j > 0 ) { + long long span = keyOps->span( data[j-1].lowKey, data[j].lowKey ) - 2; + for ( long long k = 0; k < span; k++ ) { + out << " .long " << def << " - " << + LABEL( "sjt", state->id ) << "\n"; + } + } + + out << " .long " << TRANS_GOTO_TARG( data[j].value ) << " - " << + LABEL( "sjt", state->id ) << "\n"; + } + + out << + " .text\n" + "" << LABEL( "sjf", state->id ) << ":\n"; +} + + +void AsmCodeGen::emitRangeBSearch( RedStateAp *state, int low, int high ) +{ + static int nl = 1; + + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = keyOps->eq( data[mid].lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( data[mid].highKey, keyOps->maxKey ); + +// string nf = TRANS_GOTO_TARG( state->defTrans ); + + /* For some reason the hop is faster and results in smaller code. Not sure + * why. */ + string nf = LABEL( "nf", state->id ); + + if ( anyLower && anyHigher ) { + int l1 = nl++; + string targ = TRANS_GOTO_TARG( data[mid].value ); + + /* Can go lower and higher than mid. */ + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jge " << LABEL( "nl", l1 ) << "\n"; + + + emitRangeBSearch( state, low, mid-1 ); + + out << + LABEL( "nl", l1 ) << ":\n"; + + if ( !keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY ( data[mid].highKey ) << ", %r10b\n"; + } + + out << + " jle " << targ << "\n"; + + emitRangeBSearch( state, mid+1, high ); + } + else if ( anyLower && !anyHigher ) { + + string targ; + if ( limitHigh ) + targ = TRANS_GOTO_TARG( data[mid].value ); + else + targ = LABEL( "nl", nl++ ); + + /* Can go lower than mid but not higher. */ + + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jge " << targ << "\n"; + + emitRangeBSearch( state, low, mid-1 ); + + /* If the higher is the highest in the alphabet then there is no sense + * testing it. */ + if ( !limitHigh ) { + + out << + targ << ":\n"; + + if ( ! keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY ( data[mid].highKey ) << ", %r10b\n"; + } + + out << + " jg " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + } + else if ( !anyLower && anyHigher ) { + string targ; + if ( limitLow ) + targ = TRANS_GOTO_TARG( data[mid].value ); + else + targ = LABEL( "nl", nl++ ); + + /* Can go higher than mid but not lower. */ + + out << + " cmpb " << KEY( data[mid].highKey ) << ", %r10b\n" + " jle " << targ << "\n"; + + emitRangeBSearch( state, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( !limitLow ) { + + out << + targ << ":\n"; + + if ( !keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n"; + } + + out << + " jl " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + + if ( !keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jl " << nf << "\n" + " cmpb " << KEY( data[mid].highKey ) << ", %r10b\n" + " jg " << nf << "\n"; + } + else { + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jne " << nf << "\n"; + } + + TRANS_GOTO( data[mid].value ); + } + else if ( limitLow && !limitHigh ) { + + out << + " cmpb " << KEY( data[mid].highKey ) << ", %r10b\n" + " jg " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + else if ( !limitLow && limitHigh ) { + + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jl " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO( data[mid].value ); + } + } +} + +void AsmCodeGen::emitCharClassIfElseIf( RedStateAp *st ) +{ + long long span = st->high - st->low + 1; + for ( long long pos = 0; pos < span; pos++ ) { + out << + " cmpb " << KEY( st->low + pos ) << ", %r10b\n" + " je " << TRANS_GOTO_TARG( st->transList[pos] ) << "\n"; + } +} + +void AsmCodeGen::emitCharClassJumpTable( RedStateAp *st, string def ) +{ + long long low = st->low; + long long high = st->high; + + if ( def.size() == 0 ) + def = LABEL( "ccf", st->id ); + + out << + " movzbq %r10b, %rax\n" + " subq $" << low << ", %rax\n" + " cmpq $" << (high - low) << ", %rax\n" + " ja " << def << "\n" + " leaq " << LABEL( "cct", st->id ) << "(%rip), %rcx\n" + " movslq (%rcx,%rax,4), %rdx\n" + " addq %rcx, %rdx\n" + " jmp *%rdx\n" + " .section .rodata\n" + " .align 4\n" + << LABEL( "cct", st->id ) << ":\n"; + + long long span = st->high - st->low + 1; + for ( long long pos = 0; pos < span; pos++ ) { + out << " .long " << TRANS_GOTO_TARG( st->transList[pos] ) << " - " << + LABEL( "cct", st->id ) << "\n"; + } + + out << + " .text\n" + "" << LABEL( "ccf", st->id ) << ":\n"; +} + +void AsmCodeGen::NFA_PUSH( RedStateAp *st ) +{ + if ( st->nfaTargs != 0 && st->nfaTargs->length() > 0 ) { + if ( red->nfaPrePushExpr != 0 ) { + out << " movq $" << st->nfaTargs->length() << ", %rdi\n"; + INLINE_LIST( out, red->nfaPrePushExpr->inlineList, 0, false, false ); + } + + for ( RedNfaTargs::Iter t = *st->nfaTargs; t.lte(); t++ ) { + out << + " movq " << NFA_STACK() << ", %rax\n" + " movq " << NFA_TOP() << ", %rcx\n" + " imulq $24, %rcx\n" + " movq $" << t->state->id << ", 0(%rax,%rcx,)\n" + " movq " << P() << ", 8(%rax,%rcx,)\n"; + + out << + " # pop action id " << t->id << "\n" + " movq $" << t->id << ", 16(%rax,%rcx,)\n"; + + if ( t->push ) { + for ( GenActionTable::Iter item = t->push->key; item.lte(); item++ ) { + ACTION( out, item->value, st->id, false, + t->push->anyNextStmt() ); + out << "\n"; + } + } + + out << + " movq " << NFA_TOP() << ", %rcx\n" + " addq $1, %rcx\n" + " movq %rcx, " << NFA_TOP() << "\n"; + } + } +} + +void AsmCodeGen::STATE_GOTOS() +{ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Writing code above state gotos. */ + IN_TRANS_ACTIONS( st ); + + if ( st->labelNeeded ) + out << LABEL( "st", st->id ) << ":\n"; + + + /* need to do this if the transition is an eof transition, or if the action + * contains fexec. Otherwise, no need. */ + if ( eof ) { + out << + " cmpq " << P() << ", " << vEOF() << "\n"; + + if ( st->isFinal ) + out << " je " << LABEL( "out", st->id ) << "\n"; + else + out << " je " << LABEL( "pop", st->id ) << "\n"; + } + + if ( st->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, st->id, false, + st->toStateAction->anyNextStmt() ); + out << "\n"; + } + } + + if ( st == redFsm->errState ) { + out << LABEL( "en", st->id ) << ":\n"; + + /* Break out here. */ + outLabelUsed = true; + + out << + " movq $" << st->id << ", " << vCS() << "\n" + " jmp " << LABEL( "pop" ) << "\n"; + } + else { + /* Advance and test buffer pos. */ + if ( st->labelNeeded ) { + out << + " addq $1, " << P() << "\n"; + + } + + /* This is the entry label for starting a run. */ + out << LABEL( "en", st->id ) << ":\n"; + + if ( !noEnd ) { + if ( eof ) { + out << + " cmpq " << P() << ", " << PE() << "\n" + " jne " << LABEL( "nope", st->id ) << "\n" << + " cmpq " << P() << ", " << vEOF() << "\n" + " jne " << LABEL( "out", st->id ) << "\n" << + LABEL( "nope", st->id ) << ":\n"; + } + else { + out << + " cmpq " << P() << ", " << PE() << "\n" + " je " << LABEL( "out", st->id ) << "\n"; + } + } + + NFA_PUSH( st ); + + if ( st->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = st->fromStateAction->key; + item.lte(); item++ ) + { + ACTION( out, item->value, st->id, false, + st->fromStateAction->anyNextStmt() ); + out << "\n"; + } + } + + if ( !noEnd && eof ) { + out << + " cmpq " << P() << ", " << vEOF() << "\n" + " jne " << LABEL( "neofd", st->id ) << "\n"; + + if ( st->eofTrans != 0 ) + TRANS_GOTO( st->eofTrans ); + else { + if ( st->isFinal || !redFsm->anyNfaStates() ) + out << "jmp " << LABEL( "out", st->id ) << "\n"; + else + out << "jmp " << LABEL( "pop", st->id ) << "\n"; + } + + out << + " jmp " << LABEL( "deofd", st->id ) << "\n"; + + out << LABEL( "neofd", st->id ) << ":\n"; + } + + /* Record the prev state if necessary. */ + if ( st->anyRegCurStateRef() ) { + out << + " movq $" << st->id << ", -72(%rbp)\n"; + } + + +#ifdef LOG_TRANS + out << + " movzbl (" << P() << "), %r10d\n" + " movq $" << machineId << ", %rdi\n" + " movq $" << st->id << ", %rsi\n" + " movslq %r10d, %rdx\n" + " call " << LABEL( "log_trans" ) << "\n" + ; +#endif + + /* Load *p. */ + if ( st->transList != 0 ) { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + out << + " movzbl (" << P() << "), %r10d\n" + " cmpl $" << lowKey << ", %r10d\n" + " jl " << LABEL( "nf", st->id ) << "\n" + " cmpl $" << highKey << ", %r10d\n" + " jg " << LABEL( "nf", st->id ) << "\n" + " subl " << KEY( lowKey ) << ", %r10d\n" + " leaq " << LABEL( "char_class" ) << "(%rip), %rcx\n" + " movslq %r10d, %rax\n" + " movb (%rcx, %rax), %r10b\n" + ; + + + long len = ( st->high - st->low + 1 ); + + if ( len < 8 ) + emitCharClassIfElseIf( st ); + else { + string def; + if ( st->outRange.length() == 0 ) + def = TRANS_GOTO_TARG( st->defTrans ); + emitCharClassJumpTable( st, def ); + } + } + + /* Write the default transition. */ + out << LABEL( "nf", st->id ) << ":\n"; + TRANS_GOTO( st->defTrans ); + + if ( !noEnd && eof ) { + out << LABEL( "deofd", st->id) << ":\n"; + } + } + } +} + +unsigned int AsmCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int AsmCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +unsigned int AsmCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + +bool AsmCodeGen::useAgainLabel() +{ + return redFsm->anyActionRets() || + redFsm->anyActionByValControl() || + redFsm->anyRegNextStmt(); +} + +void AsmCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << + " jmp " << LABEL( "st", gotoDest ) << "\n"; +} + +void AsmCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + ; + + ret << + " jmp " << LABEL( "st", callDest ) << "\n"; + ; +} + +void AsmCodeGen::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + "\n" + " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", %rdx\n" + "\n" + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + " movq %rdx, " << vCS() << "\n" + ; + + ret << + " jmp " << LABEL( "again" ) << "\n"; +} + +void AsmCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " subq $1, %rcx\n" + " movq (%rax, %rcx, 8), %rax\n" + " movq %rax, " << vCS() << "\n" + " movq %rcx, " << TOP() << "\n"; + + if ( red->postPopExpr != 0 ) + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + + ret << + " jmp " << LABEL("again") << "\n"; +} + +void AsmCodeGen::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", " << vCS() << "\n"; + + ret << + " jmp " << LABEL("again") << "\n"; +} + +void AsmCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << + " movq $" << nextDest << ", " << vCS() << "\n"; +} + +void AsmCodeGen::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", " << vCS() << "\n"; +} + +void AsmCodeGen::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + " movq $" << callDest << ", " << vCS() << "\n"; +} + +void AsmCodeGen::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, + int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + "\n" + " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", %rdx\n" + "\n" + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + " movq %rdx, " << vCS() << "\n"; +} + +void AsmCodeGen::NRET( ostream &ret, bool inFinish ) +{ + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " subq $1, %rcx\n" + " movq (%rax, %rcx, 8), %rax\n" + " movq %rax, " << vCS() << "\n" + " movq %rcx, " << TOP() << "\n"; + + if ( red->postPopExpr != 0 ) + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); +} + +void AsmCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << + " movq -72(%rbp), %rax\n"; +} + +void AsmCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << + " movq $" << targState << ", %rax\n"; +} + +void AsmCodeGen::BREAK( ostream &ret, int targState, bool csForced ) +{ + outLabelUsed = true; + ret << "{" << P() << "++; "; + if ( !csForced ) + ret << vCS() << " = " << targState << "; "; + ret << CTRL_FLOW() << "goto _out;}"; +} + +bool AsmCodeGen::IN_TRANS_ACTIONS( RedStateAp *state ) +{ + bool anyWritten = false; + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInCondTests; it++ ) { + /* Write the label for the transition so it can be jumped to. */ + RedTransAp *trans = state->inCondTests[it]; + out << LABEL( "ctr", trans->id ) << ":\n"; + + if ( trans->condSpace->condSet.length() == 1 ) { + RedCondPair *tp, *fp; + if ( trans->numConds() == 1 ) { + /* The single condition is either false or true, errCond is the + * opposite. */ + if ( trans->outCondKey(0) == 0 ) { + fp = trans->outCond(0); + tp = trans->errCond(); + } + else { + tp = trans->outCond(0); + fp = trans->errCond(); + } + } + else { + /* Full list, goes false, then true. */ + fp = trans->outCond(0); + tp = trans->outCond(1); + } + + GenCondSet::Iter csi = trans->condSpace->condSet; + CONDITION( out, *csi ); + + out << + " test %eax, %eax\n" + " je " << TRANS_GOTO_TARG( fp ) << "\n" + " jmp " << TRANS_GOTO_TARG( tp ) << "\n"; + } + else { + out << " movq $0, %r9\n"; + + for ( GenCondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + out << + " pushq %r9\n"; + + CONDITION( out, *csi ); + out << + "\n" + " test %eax, %eax\n" + " setne %cl\n" + " movsbq %cl, %rcx\n" + " salq $" << csi.pos() << ", %rcx\n" + " popq %r9\n" + " addq %rcx, %r9\n"; + } + + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + RedCondPair *pair = trans->outCond( c ); + out << + " cmpq " << COND_KEY( key ) << ", %r9\n" + " je " << TRANS_GOTO_TARG( pair ) << "\n"; + + } + + RedCondPair *err = trans->errCond(); + if ( err != 0 ) { + out << + " jmp " << TRANS_GOTO_TARG( err ) << "\n"; + } + } + } + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInConds; it++ ) { + RedCondPair *pair = state->inConds[it]; + if ( pair->action != 0 /* && pair->labelNeeded */ ) { + /* Remember that we wrote an action so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write the label for the transition so it can be jumped to. */ + out << LABEL( "tr", pair->id ) << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( pair->action->anyNextStmt() ) { + out << + " movq $" << pair->targ->id << ", " << vCS() << "\n"; + } + + if ( redFsm->anyRegNbreak() ) { + out << + " movb $0, " << NBREAK() << "\n"; + } + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = pair->action->key; item.lte(); item++ ) { + ACTION( out, item->value, pair->targ->id, false, + pair->action->anyNextStmt() ); + out << "\n"; + } + + if ( redFsm->anyRegNbreak() ) { + out << + " cmpb $0, " << NBREAK() << "\n" + " jne " << LABEL( "pop" ) << "\n"; + outLabelUsed = true; + } + + + /* If the action contains a next then we need to reload, otherwise + * jump directly to the target state. */ + if ( pair->action->anyNextStmt() ) + out << " jmp " << LABEL( "again" ) << "\n"; + else + out << " jmp " << LABEL( "st", pair->targ->id ) << "\n"; + } + } + + return anyWritten; +} + +std::string AsmCodeGen::TRANS_GOTO_TARG( RedCondPair *pair ) +{ + std::stringstream s; + if ( pair->action != 0 ) { + /* Go to the transition which will go to the state. */ + s << LABEL( "tr", pair->id ); + } + else { + /* Go directly to the target state. */ + s << LABEL( "st", pair->targ->id ); + } + return s.str(); +} + +std::string AsmCodeGen::TRANS_GOTO_TARG( RedTransAp *trans ) +{ + if ( trans->condSpace != 0 ) { + /* Need to jump to the trans since there are conditions. */ + return LABEL( "ctr", trans->id ); + } + else { + return TRANS_GOTO_TARG( &trans->p ); + } +} + +/* Emit the goto to take for a given transition. */ +std::ostream &AsmCodeGen::TRANS_GOTO( RedTransAp *trans ) +{ + out << " jmp " << TRANS_GOTO_TARG( trans ) << "\n"; + return out; +} + +std::ostream &AsmCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << + LABEL( "out", st->id ) << ":\n" + " movq $" << st->id << ", " << vCS() << "\n" + " jmp " << LABEL( "out" ) << "\n"; + + out << + LABEL( "pop", st->id ) << ":\n" + " movq $" << st->id << ", " << vCS() << "\n" + " jmp " << LABEL( "pop" ) << "\n"; + } + return out; +} + +std::ostream &AsmCodeGen::AGAIN_CASES() +{ + /* Jump into the machine based on the current state. */ + out << + " leaq " << LABEL( "again_jmp" ) << "(%rip), %rcx\n"; + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + + out << + " movq (%rcx,%r11,8), %rcx\n" + " jmp *%rcx\n" + " .section .rodata\n" + " .align 8\n" + << LABEL( "again_jmp" ) << ":\n"; + + for ( int stId = 0; stId < redFsm->stateList.length(); stId++ ) { + out << + " .quad " << LABEL( "st", stId ) << "\n"; + } + + out << + " .text\n"; + + return out; +} + +std::ostream &AsmCodeGen::ENTRY_CASES() +{ + out << + " movq (%rcx,%r11,8), %rcx\n" + " jmp *%rcx\n" + " .section .rodata\n" + " .align 8\n" + << LABEL( "entry_jmp" ) << ":\n"; + + for ( int stId = 0; stId < redFsm->stateList.length(); stId++ ) { + out << + " .quad " << LABEL( "en", stId ) << "\n"; + } + + out << + " .text\n"; + return out; +} + + +std::ostream &AsmCodeGen::FINISH_CASES() +{ + /* The current state is in %rax. */ + /*long done = */ nextLmSwitchLabel++; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + out << + " cmpq $" << st->id << ", %rax\n" + " jne " << LABEL( "fc", st->id ) << "\n"; + + if ( st->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = st->fromStateAction->key; + item.lte(); item++ ) + { + ACTION( out, item->value, st->id, false, + st->fromStateAction->anyNextStmt() ); + out << "\n"; + } + } + + out << + " jmp " << TRANS_GOTO_TARG( st->eofTrans ) << "\n" << + LABEL( "fc", st->id ) << ":\n"; + } + } + + return out; +} + +void AsmCodeGen::setLabelsNeeded( GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Goto: case GenInlineItem::Call: { + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( item->children ); + } +} + +void AsmCodeGen::setLabelsNeeded( RedCondPair *pair ) +{ + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( pair->action == 0 || !pair->action->anyNextStmt() ) + pair->targ->labelNeeded = true; + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( pair->action != 0 ) { + /* Loop the actions. */ + for ( GenActionTable::Iter act = pair->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( act->value->inlineList ); + } + } +} + +/* Set up labelNeeded flag for each state. */ +void AsmCodeGen::setLabelsNeeded() +{ + /* If we use the _again label, then we the _again switch, which uses all + * labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + setLabelsNeeded( &trans->p ); + } + + for ( CondApSet::Iter cond = redFsm->condSet; cond.lte(); cond++ ) + setLabelsNeeded( &cond->p ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + setLabelsNeeded( item->value->inlineList ); + } + } + } + + if ( !noEnd ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; + } +} + +void AsmCodeGen::writeData() +{ + STATE_IDS(); + + long long maxSpan = keyOps->span( redFsm->lowKey, redFsm->highKey ); + + out << + " .type " << LABEL( "char_class" ) << ", @object\n" << + LABEL( "char_class" ) << ":\n"; + + for ( long long pos = 0; pos < maxSpan; pos++ ) { + out << + " .byte " << redFsm->classMap[pos] << "\n"; + } + +#ifdef LOG_TRANS + out << + LABEL( "fmt_log_trans" ) << ":\n" + " .string \"%i %i %i\\n\"\n"; +#endif +} + +void AsmCodeGen::setNfaIds() +{ + long nextId = 1; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) { + targ->id = nextId; + nextId += 1; + } + } + } +} + +void AsmCodeGen::writeExec() +{ + /* Must set labels immediately before writing because we may depend on the + * noend write option. */ + setLabelsNeeded(); + testEofUsed = false; + outLabelUsed = false; + + setNfaIds(); + + /* If there are eof actions then we need to run code after exporting the + * final state to vCS. Since the interface register is calee-save, we need + * it to live on the stack. */ + stackCS = redFsm->anyEofActivity(); + + /* + * This code needs 88 bytes of stack (offset 0 from %rbp). + * + * cv : %r9 -- caller-save, used internally, condition char, undefined in + * conditions and actions, can use + * + * pc : %r10b -- caller-save, used internally, undefined in conditions + * actions, can use + * + * cs : %r11 -- caller-save, written by write init, read and + * written by exec, undefined in conditions and actions + * + * p : %r12 -- callee-save, interface, persistent + * + * pe : %r13 -- callee-save, interface, persistent + * + * eof: -8(%rbp) + * + * ts: -16(%rbp) + * + * te: -24(%rbp) + * + * act: -32(%rbp) + * + * _nbreak: -40(%rbp) + * + * stackCS: -48(%rbp) + * + * stack: -56(%rbp) + * top: -64(%rbp) + * + * _ps: -72(%rbp) + * + * nfa_stack -80(%rbp) + * nfa_top -88(%rbp) + * nfa_sz -96(%rbp) + */ + + if ( redFsm->anyRegCurStateRef() ) { + out << + " movq $0, -72(%rbp)\n"; + } + + if ( stackCS ) { + /* Only need a persistent cs in the case of eof actions when exiting the + * block. Where CS lives is a matter of performance though, so we should + * only do this if necessary. */ + out << + " movq %r11, " << vCS() << "\n"; + } + + if ( useAgainLabel() ) { + out << + " jmp " << LABEL( "resume" ) << "\n" + << LABEL( "again" ) << ":\n"; + + AGAIN_CASES(); + } + + if ( useAgainLabel() || redFsm->anyNfaStates() ) + out << LABEL( "resume" ) << ":\n"; + + /* Jump into the machine based on the current state. */ + out << + " leaq " << LABEL( "entry_jmp" ) << "(%rip), %rcx\n"; + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + + ENTRY_CASES(); + + STATE_GOTOS(); + + EXIT_STATES(); + + out << LABEL( "pop" ) << ":\n"; + + if ( redFsm->anyNfaStates() ) { + out << + " movq " << NFA_TOP() << ", %rcx\n" + " cmpq $0, %rcx\n" + " je " << LABEL( "nfa_stack_empty" ) << "\n" + " movq " << NFA_TOP() << ", %rcx\n" + " subq $1, %rcx\n" + " movq %rcx, " << NFA_TOP() << "\n" + " movq " << NFA_STACK() << ", %rax\n" + " imulq $24, %rcx\n" + " movq 0(%rax,%rcx,), %r11\n" + " movq 8(%rax,%rcx,), " << P() << "\n" + " movq %r11, " << vCS() << "\n" + ; + + if ( redFsm->bAnyNfaPops ) { + out << + " movq %r11, %r14\n" + " movq 16(%rax,%rcx,), %rax\n"; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) { + + /* Write the entry label. */ + out << + " # pop action select\n" + " cmp $" << targ->id << ", %rax\n" + " jne 100f\n"; + + if ( targ->popTest != 0 ) { + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = targ->popTest->key; item.lte(); item++ ) + NFA_CONDITION( out, item->value, item.last() ); + } + + out << + " jmp 101f\n" + "100:\n"; + + } + } + } + + out << + "101:\n" + " movq %r14, %r11\n"; + } + + out << + " jmp " << LABEL( "resume" ) << "\n" << + LABEL( "pop_fail" ) << ":\n" + " movq $" << ERROR_STATE() << ", " << vCS() << "\n" + " jmp " << LABEL( "resume" ) << "\n" << + LABEL( "nfa_stack_empty" ) << ":\n"; + } + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + + out << + "# WRITE EXEC END\n"; + + out << LABEL( "out" ) << ":\n"; + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + +#ifdef LOG_TRANS + out << + " jmp " << LABEL( "skip" ) << "\n" << + LABEL( "log_trans" ) << ":\n" + " movq %rdx, %rcx\n" + " movq %rsi, %rdx\n" + " movq %rdi, %rsi\n" + " movq " << LABEL( "fmt_log_trans" ) << "@GOTPCREL(%rip), %rdi\n" + " movq $0, %rax\n" + " call printf@PLT\n" + " ret\n" << + LABEL( "skip" ) << ":\n" + "\n"; +#endif +} diff --git a/ragel/asm.h b/ragel/asm.h new file mode 100644 index 00000000..a9f91e9a --- /dev/null +++ b/ragel/asm.h @@ -0,0 +1,248 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ASM_H +#define _ASM_H + +#include <iostream> +#include <string> +#include <iomanip> +#include <stdio.h> + +#include "common.h" +#include "gendata.h" +#include "ragel.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL_INTEGRAL 8 +#define IALL_STRING 128 + +/* Forwards. */ +struct RedFsmAp; +struct RedStateAp; +struct CodeGenData; +struct GenAction; +struct NameInst; +struct GenInlineItem; +struct GenInlineList; +struct RedAction; +struct LongestMatch; +struct LongestMatchPart; +struct AsmCodeGen; +struct RedTransAp; +struct RedStateAp; +struct GenStateCond; + +string itoa( int i ); + +/* + * class AsmCodeGen + */ +class AsmCodeGen : public CodeGenData +{ +public: + AsmCodeGen( const CodeGenArgs &args ); + virtual ~AsmCodeGen() {} + + virtual void writeInit(); + virtual void writeStart(); + virtual void writeFirstFinal(); + virtual void writeError(); + + virtual void statsSummary() {} + virtual void genAnalysis(); + +protected: + string FSM_NAME(); + string START_STATE_ID(); + string KEY( Key key ); + string COND_KEY( CondKey key ); + string LDIR_PATH( char *path ); + virtual void ACTION( ostream &ret, GenAction *action, int targState, + bool inFinish, bool csForced ); + void CONDITION( ostream &ret, GenAction *condition ); + void NFA_CONDITION( ostream &ret, GenAction *condition, bool last ); + string ALPH_TYPE(); + + bool isAlphTypeSigned(); + + string GET_KEY(); + + string P(); + string PE(); + string vEOF(); + string NBREAK(); + + string ACCESS(); + string vCS(); + string STACK(); + string TOP(); + string TOKSTART(); + string TOKEND(); + string ACT(); + + string NFA_STACK(); + string NFA_TOP(); + string NFA_SZ(); + + string DATA_PREFIX(); + string PM() { return "_" + DATA_PREFIX() + "partition_map"; } + string C() { return "_" + DATA_PREFIX() + "cond_spaces"; } + string CK() { return "_" + DATA_PREFIX() + "cond_keys"; } + string K() { return "_" + DATA_PREFIX() + "trans_keys"; } + string I() { return "_" + DATA_PREFIX() + "indicies"; } + string CO() { return "_" + DATA_PREFIX() + "cond_offsets"; } + string KO() { return "_" + DATA_PREFIX() + "key_offsets"; } + string IO() { return "_" + DATA_PREFIX() + "index_offsets"; } + string CL() { return "_" + DATA_PREFIX() + "cond_lengths"; } + string SL() { return "_" + DATA_PREFIX() + "single_lengths"; } + string RL() { return "_" + DATA_PREFIX() + "range_lengths"; } + string A() { return "_" + DATA_PREFIX() + "actions"; } + string TA() { return "_" + DATA_PREFIX() + "trans_actions"; } + string TT() { return "_" + DATA_PREFIX() + "trans_targs"; } + string TSA() { return "_" + DATA_PREFIX() + "to_state_actions"; } + string FSA() { return "_" + DATA_PREFIX() + "from_state_actions"; } + string EA() { return "_" + DATA_PREFIX() + "eof_actions"; } + string ET() { return "_" + DATA_PREFIX() + "eof_trans"; } + string SP() { return "_" + DATA_PREFIX() + "key_spans"; } + string CSP() { return "_" + DATA_PREFIX() + "cond_key_spans"; } + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + string CTXDATA() { return DATA_PREFIX() + "ctxdata"; } + + string LABEL( const char *type, long i ); + string LABEL( const char *name ); + + void INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ); + void EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, GenInlineItem *item, int targState, + int inFinish, bool csForced ); + void SET_ACT( ostream &ret, GenInlineItem *item ); + void INIT_TOKSTART( ostream &ret, GenInlineItem *item ); + void INIT_ACT( ostream &ret, GenInlineItem *item ); + void SET_TOKSTART( ostream &ret, GenInlineItem *item ); + void SET_TOKEND( ostream &ret, GenInlineItem *item ); + void GET_TOKEND( ostream &ret, GenInlineItem *item ); + void STATIC_CONST_INT( const string &name, const string &val ); + void STATE_IDS(); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + bool outLabelUsed; + bool testEofUsed; + bool againLabelUsed; + long nextLmSwitchLabel; + bool stackCS; + + void NBREAK( ostream &ret, int targState, bool csForced ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + + void HOST_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void HOST_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void HOST_TEXT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void GEN_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void GEN_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + +public: + + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( string type, string name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( string type, string name ); + virtual string ARR_OFF( string ptr, string offset ); + virtual string CAST( string type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string PTR_CONST_END(); + virtual string CTRL_FLOW(); + + virtual void writeExports(); + + unsigned int TO_STATE_ACTION( RedStateAp *state ); + unsigned int FROM_STATE_ACTION( RedStateAp *state ); + unsigned int EOF_ACTION( RedStateAp *state ); + + void COND_TRANSLATE( GenStateCond *stateCond ); + void STATE_CONDS( RedStateAp *state, bool genDefault ); + + std::ostream &EXIT_STATES(); + std::string TRANS_GOTO_TARG( RedTransAp *trans ); + std::string TRANS_GOTO_TARG( RedCondPair *pair ); + std::ostream &TRANS_GOTO( RedTransAp *trans ); + std::ostream &AGAIN_CASES(); + std::ostream &FINISH_CASES(); + std::ostream &ENTRY_CASES(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void BREAK( ostream &ret, int targState, bool csForced ); + void LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + + virtual void writeData(); + virtual void writeExec(); + + bool useAgainLabel(); + + void NFA_PUSH( RedStateAp *state ); + bool IN_TRANS_ACTIONS( RedStateAp *state ); + void STATE_GOTOS(); + + void emitSingleIfElseIf( RedStateAp *state ); + void emitSingleJumpTable( RedStateAp *state, std::string def ); + void emitRangeBSearch( RedStateAp *state, int low, int high ); + void emitCharClassIfElseIf( RedStateAp *state ); + void emitCharClassJumpTable( RedStateAp *state, std::string def ); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded( RedCondPair *pair ); + void setLabelsNeeded( GenInlineList *inlineList ); + void setLabelsNeeded(); + + void setNfaIds(); + + void genOutputLineDirective( ostream &out ) {} + void genLineDirective( ostream &out, int line, const char *file ) {} +}; + +#endif diff --git a/ragel/binary.cc b/ragel/binary.cc new file mode 100644 index 00000000..95aaa803 --- /dev/null +++ b/ragel/binary.cc @@ -0,0 +1,819 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "binary.h" +#include "redfsm.h" +#include "gendata.h" + +#include <assert.h> + +void Binary::genAnalysis() +{ + redFsm->sortByStateId(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose the singles. */ + redFsm->moveSelectTransToSingle(); + + if ( redFsm->errState != 0 ) + redFsm->getErrorCond(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + setKeyType(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + + +void Binary::tableDataPass() +{ + if ( type == Loop ) + taActions(); + + taKeyOffsets(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + taIndicies(); + + taTransCondSpacesWi(); + taTransOffsetsWi(); + taTransLengthsWi(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondTargs(); + taCondActions(); + + taToStateActions(); + taFromStateActions(); + taEofActions(); + taEofConds(); + taEofTrans(); + + taKeys(); + taCondKeys(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Binary::writeData() +{ + if ( type == Loop ) { + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) + taActions(); + } + + taKeyOffsets(); + taKeys(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondKeys(); + taCondTargs(); + taCondActions(); + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + taEofConds(); + + if ( redFsm->anyEofTrans() ) + taEofTrans(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + + +void Binary::setKeyType() +{ + transKeys.setType( ALPH_TYPE(), alphType->size, alphType->isChar ); + transKeys.isSigned = keyOps->isSigned; +} + +void Binary::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +void Binary::taKeyOffsets() +{ + keyOffsets.start(); + + int curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + keyOffsets.value( curKeyOffset ); + curKeyOffset += st->outSingle.length() + st->outRange.length()*2; + } + + keyOffsets.finish(); +} + + +void Binary::taSingleLens() +{ + singleLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + singleLens.value( st->outSingle.length() ); + + singleLens.finish(); +} + + +void Binary::taRangeLens() +{ + rangeLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + rangeLens.value( st->outRange.length() ); + + rangeLens.finish(); +} + +void Binary::taIndexOffsets() +{ + indexOffsets.start(); + + int curIndOffset = 0; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + indexOffsets.value( curIndOffset ); + + /* Move the index offset ahead. */ + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + + indexOffsets.finish(); +} + +void Binary::taToStateActions() +{ + toStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + TO_STATE_ACTION(st); + + toStateActions.finish(); +} + +void Binary::taFromStateActions() +{ + fromStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + FROM_STATE_ACTION(st); + + fromStateActions.finish(); +} + +void Binary::taEofActions() +{ + eofActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + EOF_ACTION( st ); + + eofActions.finish(); +} + +void Binary::taEofConds() +{ + /* + * EOF Cond Spaces + */ + eofCondSpaces.start(); + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) + eofCondSpaces.value( st->outCondSpace->condSpaceId ); + else + eofCondSpaces.value( -1 ); + } + eofCondSpaces.finish(); + + /* + * EOF Cond Key Indixes + */ + eofCondKeyOffs.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long off = 0; + if ( st->outCondSpace != 0 ) { + off = curOffset; + curOffset += st->outCondKeys.length(); + } + eofCondKeyOffs.value( off ); + } + + eofCondKeyOffs.finish(); + + /* + * EOF Cond Key Lengths. + */ + eofCondKeyLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long len = 0; + if ( st->outCondSpace != 0 ) + len = st->outCondKeys.length(); + eofCondKeyLens.value( len ); + } + + eofCondKeyLens.finish(); + + /* + * EOF Cond Keys + */ + eofCondKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) { + for ( int c = 0; c < st->outCondKeys.length(); c++ ) { + CondKey key = st->outCondKeys[c]; + eofCondKeys.value( key.getVal() ); + } + } + } + + eofCondKeys.finish(); +} + +void Binary::taEofTrans() +{ + eofTrans.start(); + + /* Need to compute transition positions. */ + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + totalTrans += st->outSingle.length(); + totalTrans += st->outRange.length(); + if ( st->defTrans != 0 ) + totalTrans += 1; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long trans = 0; + if ( st->eofTrans != 0 ) { + trans = totalTrans + 1; + totalTrans += 1; + } + + eofTrans.value( trans ); + } + + eofTrans.finish(); +} + +void Binary::taKeys() +{ + transKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + transKeys.value( stel->lowKey.getVal() ); + } + + /* Loop the state's transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + /* Lower key. */ + transKeys.value( rtel->lowKey.getVal() ); + + /* Upper key. */ + transKeys.value( rtel->highKey.getVal() ); + } + } + + transKeys.finish(); +} + +void Binary::taIndicies() +{ + indicies.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + indicies.value( stel->value->id ); + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + indicies.value( rtel->value->id ); + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + indicies.value( st->defTrans->id ); + } + + indicies.finish(); +} + +void Binary::taTransCondSpaces() +{ + transCondSpaces.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + transCondSpaces.finish(); +} + +void Binary::taTransOffsets() +{ + transOffsets.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + errCondOffset = curOffset; + + transOffsets.finish(); +} + +void Binary::taTransLengths() +{ + transLengths.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transLengths.value( trans->numConds() ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transLengths.value( trans->numConds() ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transLengths.value( trans->numConds() ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transLengths.value( trans->numConds() ); + } + } + + transLengths.finish(); +} + +void Binary::taTransCondSpacesWi() +{ + transCondSpacesWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Cond Space id. */ + if ( trans->condSpace != 0 ) + transCondSpacesWi.value( trans->condSpace->condSpaceId ); + else + transCondSpacesWi.value( -1 ); + } + + transCondSpacesWi.finish(); +} + +void Binary::taTransOffsetsWi() +{ + transOffsetsWi.start(); + + int curOffset = 0; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transOffsetsWi.value( curOffset ); + + TransApSet::Iter next = trans; + next.increment(); + + curOffset += trans->numConds(); + } + + transOffsetsWi.finish(); +} + +void Binary::taTransLengthsWi() +{ + transLengthsWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transLengthsWi.value( trans->numConds() ); + + TransApSet::Iter next = trans; + next.increment(); + } + + transLengthsWi.finish(); +} + +void Binary::taCondKeys() +{ + condKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + condKeys.finish(); +} + +void Binary::taCondTargs() +{ + condTargs.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + condTargs.value( cond->targ->id ); + } + + condTargs.finish(); +} + +void Binary::taCondActions() +{ + condActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + COND_ACTION( cond ); + } + + condActions.finish(); +} + +void Binary::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Binary::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Binary::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + + nfaPopTrans.value( 0 ); + + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + +void Binary::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + nfaOffsets.value( 0 ); + } + else { + nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + nfaOffsets.finish(); +} + + +/* Write out the array of actions. */ +std::ostream &Binary::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +void Binary::taActions() +{ + actions.start(); + + /* Put "no-action" at the beginning. */ + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + + + + diff --git a/ragel/binary.h b/ragel/binary.h new file mode 100644 index 00000000..8b9fc69a --- /dev/null +++ b/ragel/binary.h @@ -0,0 +1,98 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_BINARY_H +#define _C_BINARY_H + +#include <iostream> +#include "codegen.h" +#include "tables.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +class Binary + : public virtual Tables +{ +protected: + enum Type { + Loop = 1, Exp + }; + +public: + Binary( const CodeGenArgs &args, Type type ) + : + Tables( args ), + type(type) + {} + +protected: + Type type; + + std::ostream &COND_KEYS_v1(); + std::ostream &COND_SPACES_v1(); + std::ostream &INDICIES(); + std::ostream &INDEX_OFFSETS(); + std::ostream &SINGLE_LENS(); + std::ostream &RANGE_LENS(); + std::ostream &TRANS_TARGS_WI(); + std::ostream &ACTIONS_ARRAY(); + + void taKeyOffsets(); + void taSingleLens(); + void taRangeLens(); + void taIndexOffsets(); + void taIndicies(); + void taTransCondSpacesWi(); + void taTransOffsetsWi(); + void taTransLengthsWi(); + void taTransCondSpaces(); + void taTransOffsets(); + void taTransLengths(); + void taCondTargs(); + void taCondActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofTrans(); + void taEofConds(); + void taEofActions(); + void taKeys(); + void taActions(); + void taCondKeys(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void setKeyType(); + + void setTableState( TableArray::State ); + + virtual void writeData(); + virtual void tableDataPass(); + virtual void genAnalysis(); +}; + +#endif diff --git a/ragel/binbreak.cc b/ragel/binbreak.cc new file mode 100644 index 00000000..1141408a --- /dev/null +++ b/ragel/binbreak.cc @@ -0,0 +1,132 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "binbreak.h" + +void BinBreak::LOCATE_TRANS() +{ + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), ARR_REF( keyOffsets ) + "[" + vCS() + "]" ) << ";\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexOffsets ) << "[" << vCS() << "];\n" + "\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( singleLens ) << "[" << vCS() << "];\n" + " " << have << " = 0;\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + " << klen << " - 1;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << keys << " += " << klen << ";\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << have << " = 1;\n" + " " << trans << " += " << CAST( UINT() ) << "(_mid - " << keys << ");\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n" + " " << klen << " = " << CAST("int") << ARR_REF( rangeLens ) << "[" << vCS() << "];\n" + " if ( " << have << " == 0 && " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + (" << klen << "<<1) - 2;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid + 1" ) << " )\n" + " _lower = _mid + 2;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "((_mid - " << keys << ")>>1);\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + +void BinBreak::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + string(trans) + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + string(trans) + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } + + out << EMIT_LABEL( _match_cond ); +} + diff --git a/ragel/binbreak.h b/ragel/binbreak.h new file mode 100644 index 00000000..1b48ab24 --- /dev/null +++ b/ragel/binbreak.h @@ -0,0 +1,71 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_BINBREAK_H +#define RAGEL_BINBREAK_H + +#include "binary.h" +#include "actloop.h" +#include "actexp.h" + +struct BinBreak +: + public Binary, public TabBreak +{ + BinBreak( const CodeGenArgs &args, Binary::Type type ) + : + Tables( args ), + Binary( args, type ), + TabBreak( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class BinBreakLoop + : public BinBreak, public ActLoop +{ +public: + BinBreakLoop( const CodeGenArgs &args ) + : + Tables( args ), + BinBreak( args, Loop ), + ActLoop( args ) + {} +}; + + +class BinBreakExp + : public BinBreak, public ActExp +{ +public: + BinBreakExp( const CodeGenArgs &args ) + : + Tables( args ), + BinBreak( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/ragel/bingoto.cc b/ragel/bingoto.cc new file mode 100644 index 00000000..14cab678 --- /dev/null +++ b/ragel/bingoto.cc @@ -0,0 +1,131 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "bingoto.h" + +void BinGoto::LOCATE_TRANS() +{ + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), ARR_REF( keyOffsets ) + "[" + vCS() + "]" ) << ";\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexOffsets ) << "[" << vCS() << "];\n" + "\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( singleLens ) << "[" << vCS() << "];\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + " << klen << " - 1;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << keys << " += " << klen << ";\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "(_mid - " << keys << ");\n" + " goto " << _match << ";\n" + " }\n" + " }\n" + " }\n" + "\n" + " " << klen << " = " << CAST("int") << ARR_REF( rangeLens ) << "[" << vCS() << "];\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + (" << klen << "<<1) - 2;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid + 1" ) << " )\n" + " _lower = _mid + 2;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "((_mid - " << keys << ")>>1);\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; + + out << EMIT_LABEL( _match ); +} + + +void BinGoto::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + string(trans) + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + string(trans) + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/ragel/bingoto.h b/ragel/bingoto.h new file mode 100644 index 00000000..18fa8397 --- /dev/null +++ b/ragel/bingoto.h @@ -0,0 +1,71 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_BINGOTO_H +#define RAGEL_BINGOTO_H + +#include "binary.h" +#include "actloop.h" +#include "actexp.h" + +struct BinGoto +: + public Binary, public TabGoto +{ + BinGoto( const CodeGenArgs &args, Binary::Type type ) + : + Tables( args ), + Binary( args, type ), + TabGoto( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class BinGotoLoop + : public BinGoto, public ActLoop +{ +public: + BinGotoLoop( const CodeGenArgs &args ) + : + Tables( args ), + BinGoto( args, Loop ), + ActLoop( args ) + {} +}; + + +class BinGotoExp + : public BinGoto, public ActExp +{ +public: + BinGotoExp( const CodeGenArgs &args ) + : + Tables( args ), + BinGoto( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/ragel/binvar.cc b/ragel/binvar.cc new file mode 100644 index 00000000..e28fc251 --- /dev/null +++ b/ragel/binvar.cc @@ -0,0 +1,139 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "binvar.h" +#include "parsedata.h" +#include "inputdata.h" + + +void BinVar::LOCATE_TRANS() +{ + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), ARR_REF( keyOffsets ) + "[" + vCS() + "]" ) << ";\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexOffsets ) << "[" << vCS() << "];\n" + "\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( singleLens ) << "[" << vCS() << "];\n" + " " << have << " = 0;\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + " << klen << " - 1;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << keys << " += " << klen << ";\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << have << " = 1;\n" + " " << trans << " += " << CAST( UINT() ) << "(_mid - " << keys << ");\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "\n" + " " << klen << " = " << CAST("int") << ARR_REF( rangeLens ) << "[" << vCS() << "];\n" + " if ( " << have << " == 0 && " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + (" << klen << "<<1) - 2;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid + 1" ) << " )\n" + " _lower = _mid + 2;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "((_mid - " << keys << ")>>1);\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + +void BinVar::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + string(trans) + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + string(trans) + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/ragel/binvar.h b/ragel/binvar.h new file mode 100644 index 00000000..cbbcef79 --- /dev/null +++ b/ragel/binvar.h @@ -0,0 +1,72 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_BINVAR_H +#define RAGEL_BINVAR_H + +#include "binary.h" +#include "actloop.h" +#include "actexp.h" + +struct BinVar +: + public Binary, public TabVar +{ + BinVar( const CodeGenArgs &args, Binary::Type type ) + : + Tables( args ), + Binary( args, type ), + TabVar( args ) + {} + + void VAR_COND_BIN_SEARCH( Variable &var, TableArray &keys, std::string ok, std::string error ); + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class BinVarLoop + : public BinVar, public ActLoop +{ +public: + BinVarLoop( const CodeGenArgs &args ) + : + Tables( args ), + BinVar( args, Loop ), + ActLoop( args ) + {} +}; + +class BinVarExp +: + public BinVar, public ActExp +{ +public: + BinVarExp( const CodeGenArgs &args ) + : + Tables( args ), + BinVar( args, Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/ragel/buffer.h b/ragel/buffer.h new file mode 100644 index 00000000..72bcd5f9 --- /dev/null +++ b/ragel/buffer.h @@ -0,0 +1,56 @@ +/* + * Copyright 2003-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _BUFFER_H +#define _BUFFER_H + +#define BUFFER_INITIAL_SIZE 4096 + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +struct Buffer +{ + Buffer() + { + data = (char*) malloc( BUFFER_INITIAL_SIZE ); + allocated = BUFFER_INITIAL_SIZE; + length = 0; + } + ~Buffer() { free(data); } + + void append( char p ) + { + if ( length == allocated ) { + allocated *= 2; + data = (char*) realloc( data, allocated ); + } + data[length++] = p; + } + + void clear() { length = 0; } + + char *data; + int allocated; + int length; +}; + +#endif diff --git a/ragel/codegen.cc b/ragel/codegen.cc new file mode 100644 index 00000000..90ea9642 --- /dev/null +++ b/ragel/codegen.cc @@ -0,0 +1,1201 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "codegen.h" +#include "ragel.h" +#include "redfsm.h" +#include "gendata.h" +#include "inputdata.h" +#include "parsedata.h" +#include <sstream> +#include <string> +#include <assert.h> +#include <iomanip> + + +using std::ostream; +using std::ostringstream; +using std::string; +using std::endl; +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +std::ostream &operator<<( std::ostream &out, Variable &v ) +{ + out << v.name; + v.isReferenced = true; + return out; +} + +std::ostream &operator<<( std::ostream &out, GotoLabel &l ) +{ + out << l.name; + l.isReferenced = true; + return out; +} + +TableArray::TableArray( const char *name, CodeGen &codeGen ) +: + state(InitialState), + name(name), + width(0), + isSigned(true), + isChar(false), + stringTables( codeGen.stringTables ), + iall( codeGen.stringTables ? IALL_STRING : IALL_INTEGRAL ), + values(0), + + /* + * Use zero for min and max because + * we we null terminate every array. + */ + min(0), + max(0), + + codeGen(codeGen), + out(codeGen.out), + ln(0), + isReferenced(false), + started(false) +{ + codeGen.arrayVector.append( this ); +} + +std::string TableArray::ref() +{ + isReferenced = true; + return string("_") + codeGen.DATA_PREFIX() + name; +} + +long long TableArray::size() +{ + return width * values; +} + +void TableArray::startAnalyze() +{ +} + +void TableArray::valueAnalyze( long long v ) +{ + values += 1; + if ( v < min ) + min = v; + if ( v > max ) + max = v; +} + +void TableArray::finishAnalyze() +{ + if ( codeGen.backend == Direct ) { + /* Calculate the type if it is not already set. */ + if ( type.empty() ) { + if ( min >= S8BIT_MIN && max <= S8BIT_MAX ) { + type = "signed char"; + width = sizeof(char); + } + else if ( min >= S16BIT_MIN && max <= S16BIT_MAX ) { + type = "short"; + width = sizeof(short); + } + else if ( min >= S32BIT_MIN && max <= S32BIT_MAX ) { + type = "int"; + width = sizeof(int); + } + else if ( min >= S64BIT_MAX && max <= S64BIT_MAX ) { + type = "long"; + width = sizeof(long); + } + else { + type = "long long"; + width = sizeof(long long); + } + } + } + else { + /* Calculate the type if it is not already set. */ + if ( type.empty() ) { + if ( min >= S8BIT_MIN && max <= S8BIT_MAX ) { + type = "s8"; + width = sizeof(char); + } + else if ( min >= S16BIT_MIN && max <= S16BIT_MAX ) { + type = "s16"; + width = sizeof(short); + } + else if ( min >= S32BIT_MIN && max <= S32BIT_MAX ) { + type = "s32"; + width = sizeof(int); + } + else if ( min >= S64BIT_MAX && max <= S64BIT_MAX ) { + type = "s64"; + width = sizeof(long); + } + else { + type = "s128"; + width = sizeof(long long); + } + } + } +} + +void TableArray::startGenerate() +{ + if ( codeGen.backend == Direct ) { + if ( stringTables ) { + out << "static const char S_" << codeGen.DATA_PREFIX() << name << + "[] __attribute__((aligned (16))) = \n\t\""; + } + else { + out << "static const " << type << " " << + "_" << codeGen.DATA_PREFIX() << name << + "[] = {\n\t"; + } + } + else { + out << "array " << type << " " << + "_" << codeGen.DATA_PREFIX() << name << + "( " << min << ", " << max << " ) = { "; + } +} + +void TableArray::stringGenerate( long long value ) +{ + char c; + short h; + int i; + long l; + unsigned char *p = 0; + int n = 0; + switch ( width ) { + case sizeof( char ): + c = value; + p = (unsigned char *)&c; + n = sizeof(char); + break; + case sizeof( short ): + h = value; + p = (unsigned char *)&h; + n = sizeof(short); + break; + case sizeof( int ): + i = value; + p = (unsigned char *)&i; + n = sizeof(int); + break; +#if SIZEOF_INT != SIZEOF_LONG + case sizeof( long ): + l = value; + p = (unsigned char *)&l; + n = sizeof(long); + break; +#endif + } + + std::ios_base::fmtflags prevFlags = out.flags( std::ios::hex ); + int prevFill = out.fill( '0' ); + + while ( n-- > 0 ) { + out << '\\'; + out << 'x'; + out << std::setw(2) << (unsigned int) *p++; + } + + out.flags( prevFlags ); + out.fill( prevFill ); +} + +void TableArray::valueGenerate( long long v ) +{ + if ( codeGen.backend == Direct ) { + if ( stringTables ) { + stringGenerate( v ); + + if ( ++ln % iall == 0 ) { + out << "\"\n\t\""; + ln = 0; + } + } + else { + if ( isChar ) + out << "c(" << v << ")"; + else if ( !isSigned ) + out << v << "u"; + else + out << v; + + if ( ( ++ln % iall ) == 0 ) { + out << ",\n\t"; + ln = 0; + } + else { + out << ", "; + } + } + } + else { + if ( isChar ) + out << "c(" << v << ")"; + else if ( !isSigned ) + out << "u(" << v << ")"; + else + out << v; + out << ", "; + } +} + +void TableArray::finishGenerate() +{ + if ( codeGen.backend == Direct ) { + if ( stringTables ) { + out << "\";\nconst " << type << " *_" << codeGen.DATA_PREFIX() << name << + " = (const " << type << "*) S_" << codeGen.DATA_PREFIX() << name << ";\n\n"; + + } + else { + if ( isChar ) + out << "c(0)\n};\n\n"; + else if ( !isSigned ) + out << "0u\n};\n\n"; + else + out << "0\n};\n\n"; + } + } + else { + if ( isChar ) + out << "c(0) };\n\n"; + else if ( !isSigned ) + out << "u(0) };\n\n"; + else + out << "0 };\n\n"; + } + + if ( codeGen.red->id->printStatistics ) { + codeGen.red->id->stats() << name << "\t" << values << "\t" << + size() << "\t" << endl; + } + + codeGen.tableData += size(); +} + +void TableArray::start() +{ + assert( !started ); + started = true; + switch ( state ) { + case InitialState: + break; + case AnalyzePass: + startAnalyze(); + break; + case GeneratePass: + if ( isReferenced ) + startGenerate(); + break; + } +} + +void TableArray::value( long long v ) +{ + assert( started ); + switch ( state ) { + case InitialState: + break; + case AnalyzePass: + valueAnalyze( v ); + break; + case GeneratePass: + if ( isReferenced ) + valueGenerate( v ); + break; + } +} + +void TableArray::finish() +{ + assert( started ); + started = false; + switch ( state ) { + case InitialState: + break; + case AnalyzePass: + finishAnalyze(); + break; + case GeneratePass: + if ( isReferenced ) + finishGenerate(); + break; + } +} + +/* Init code gen with in parameters. */ +CodeGen::CodeGen( const CodeGenArgs &args ) +: + CodeGenData( args ), + cpc( "_cpc" ), + pop_test( "_pop_test" ), + new_recs( "new_recs" ), + alt( "_alt" ), + tableData( 0 ), + backend( args.id->hostLang->backend ), + stringTables( args.id->stringTables ), + + nfaTargs( "nfa_targs", *this ), + nfaOffsets( "nfa_offsets", *this ), + nfaPushActions( "nfa_push_actions", *this ), + nfaPopTrans( "nfa_pop_trans", *this ) +{ +} + +void CodeGen::statsSummary() +{ + if ( red->id->printStatistics ) + red->id->stats() << "table-data\t\t" << tableData << endl << endl; +} + + +string CodeGen::CAST( string type ) +{ + if ( backend == Direct ) + return "(" + type + ")"; + else + return "cast(" + type + ")"; +} + +/* Write out the fsm name. */ +string CodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string CodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + + +string CodeGen::ACCESS() +{ + ostringstream ret; + if ( red->accessExpr != 0 ) { + ret << OPEN_HOST_PLAIN(); + INLINE_LIST( ret, red->accessExpr, 0, false, false ); + ret << CLOSE_HOST_PLAIN(); + ret << ACCESS_OPER(); + } + return ret.str(); +} + + +string CodeGen::P() +{ + ostringstream ret; + if ( red->pExpr == 0 ) + ret << "p"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->pExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::PE() +{ + ostringstream ret; + if ( red->peExpr == 0 ) + ret << "pe"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->peExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::vEOF() +{ + ostringstream ret; + if ( red->eofExpr == 0 ) + ret << "eof"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->eofExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::vCS() +{ + ostringstream ret; + if ( red->csExpr == 0 ) + ret << ACCESS() << "cs"; + else { + /* Emit the user supplied method of retrieving the key. */ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->csExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::TOP() +{ + ostringstream ret; + if ( red->topExpr == 0 ) + ret << ACCESS() + "top"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->topExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::STACK() +{ + ostringstream ret; + if ( red->stackExpr == 0 ) + ret << ACCESS() + "stack"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->stackExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::ACT() +{ + ostringstream ret; + if ( red->actExpr == 0 ) + ret << ACCESS() + "act"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->actExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::TOKSTART() +{ + ostringstream ret; + if ( red->tokstartExpr == 0 ) + ret << ACCESS() + "ts"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->tokstartExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::TOKEND() +{ + ostringstream ret; + if ( red->tokendExpr == 0 ) + ret << ACCESS() + "te"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->tokendExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::GET_KEY() +{ + ostringstream ret; + if ( red->getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->getKeyExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "( " << DEREF( "data", P() ) << ")"; + } + return ret.str(); +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string CodeGen::KEY( Key key ) +{ + if ( backend == Direct ) { + ostringstream ret; + if ( alphType->isChar ) + ret << "c(" << (unsigned long) key.getVal() << ")"; + else if ( keyOps->isSigned || !keyOps->explicitUnsigned ) + ret << key.getVal(); + else + ret << (unsigned long) key.getVal() << "u"; + return ret.str(); + } + else { + ostringstream ret; + if ( alphType->isChar ) + ret << "c(" << (unsigned long) key.getVal() << ")"; + else if ( keyOps->isSigned || !keyOps->explicitUnsigned ) + ret << key.getVal(); + else + ret << "u(" << (unsigned long) key.getVal() << ")"; + return ret.str(); + } +} + +bool CodeGen::isAlphTypeSigned() +{ + return keyOps->isSigned; +} + +void CodeGen::DECLARE( std::string type, Variable &var, std::string init ) +{ + if ( var.isReferenced ) + out << type << " " << var.name << init << ";\n"; +} + +void CodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + ret << OPEN_GEN_BLOCK() << P() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << "))-1;" << CLOSE_GEN_BLOCK() << "\n"; +} + +void CodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item, + int targState, int inFinish, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << "switch( " << ACT() << " ) {\n"; + + for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) { + /* Write the case label, the action and the case break. */ + if ( lma->lmId < 0 ) + ret << " " << DEFAULT() << " {\n"; + else + ret << " " << CASE( STR(lma->lmId) ) << " {\n"; + + /* Write the block and close it off. */ + INLINE_LIST( ret, lma->children, targState, inFinish, csForced ); + + ret << CEND() << "\n}\n"; + } + + ret << + " }" << CLOSE_GEN_BLOCK() << "\n" + "\t"; +} + +void CodeGen::LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. This should be in the D code generator. */ + ret << P() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << "))-1;\n"; +} + +void CodeGen::SET_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << ACT() << " = " << item->lmId << ";"; +} + +void CodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << TOKEND() << " = " << P(); + if ( item->offset != 0 ) + out << "+" << item->offset; + out << ";"; +} + +void CodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + ret << TOKEND(); +} + +void CodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << TOKSTART() << " = " << NIL() << ";"; +} + +void CodeGen::INIT_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void CodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void CodeGen::HOST_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_HOST_BLOCK( item->loc.fileName, item->loc.line ); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_HOST_BLOCK(); + } +} + +#if 0 +void CodeGen::LM_CASE( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} +#endif + +void CodeGen::HOST_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_HOST_EXPR(); + } +} + +void CodeGen::HOST_TEXT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_HOST_PLAIN(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_HOST_PLAIN(); + } +} + +void CodeGen::GEN_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_GEN_BLOCK(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_GEN_BLOCK(); + } +} + +void CodeGen::GEN_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_GEN_EXPR(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_GEN_EXPR(); + } +} + +void CodeGen::INLINE_EXPR( ostream &ret, GenInlineList *inlineList ) +{ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, inlineList, 0, false, false ); + ret << CLOSE_HOST_EXPR(); +} + +void CodeGen::INLINE_BLOCK( ostream &ret, GenInlineExpr *inlineExpr ) +{ + out << OPEN_HOST_BLOCK( inlineExpr ); + INLINE_LIST( out, inlineExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); +} + +void CodeGen::INLINE_PLAIN( ostream &ret, GenInlineExpr *inlineExpr ) +{ + +} + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void CodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Text: + if ( backend == Direct ) + ret << item->data; + else + translatedHostData( ret, item->data ); + break; + case GenInlineItem::Goto: + GOTO( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Call: + CALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::Ncall: + NCALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::Next: + NEXT( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Ret: + RET( ret, inFinish ); + break; + case GenInlineItem::Nret: + NRET( ret, inFinish ); + break; + case GenInlineItem::PChar: + ret << P(); + break; + case GenInlineItem::Char: + ret << OPEN_GEN_EXPR() << GET_KEY() << CLOSE_GEN_EXPR(); + break; + case GenInlineItem::Hold: + ret << OPEN_GEN_BLOCK() << P() << " = " << P() << " - 1; " << CLOSE_GEN_BLOCK(); + break; + case GenInlineItem::LmHold: + ret << P() << " = " << P() << " - 1;"; + break; + case GenInlineItem::NfaClear: + ret << "nfa_len = 0; "; + break; + case GenInlineItem::Exec: + EXEC( ret, item, targState, inFinish ); + break; + case GenInlineItem::Curs: + CURS( ret, inFinish ); + break; + case GenInlineItem::Targs: + TARGS( ret, inFinish, targState ); + break; + case GenInlineItem::Entry: + ret << item->targState->id; + break; + case GenInlineItem::GotoExpr: + GOTO_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::CallExpr: + CALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::NcallExpr: + NCALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::NextExpr: + NEXT_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::LmExec: + LM_EXEC( ret, item, targState, inFinish ); + break; + case GenInlineItem::LmCase: + /* Not encountered here, in the lm switch. */ + break; + case GenInlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case GenInlineItem::LmGetTokEnd: + GET_TOKEND( ret, item ); + break; + case GenInlineItem::LmInitTokStart: + INIT_TOKSTART( ret, item ); + break; + case GenInlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case GenInlineItem::Break: + BREAK( ret, targState, csForced ); + break; + case GenInlineItem::Nbreak: + NBREAK( ret, targState, csForced ); + break; + case GenInlineItem::HostStmt: + HOST_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostExpr: + HOST_EXPR( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostText: + HOST_TEXT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenStmt: + GEN_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenExpr: + GEN_EXPR( ret, item, targState, inFinish, csForced ); + break; + /* These should not be encountered. We handle these Nfa wraps at the top level. */ + case GenInlineItem::NfaWrapAction: + case GenInlineItem::NfaWrapConds: + break; + } + } +} + +/* Write out paths in line directives. Escapes any special characters. */ +string CodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void CodeGen::ACTION( ostream &ret, GenAction *action, IlOpts opts ) +{ + ret << '\t'; + ret << OPEN_HOST_BLOCK( action->loc.fileName, action->loc.line ); + INLINE_LIST( ret, action->inlineList, opts.targState, opts.inFinish, opts.csForced ); + ret << CLOSE_HOST_BLOCK(); + ret << "\n"; + genOutputLineDirective( ret ); +} + +void CodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << OPEN_HOST_EXPR( condition->loc.fileName, condition->loc.line ); + INLINE_LIST( ret, condition->inlineList, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + ret << "\n"; + genOutputLineDirective( ret ); +} + +void CodeGen::NFA_CONDITION( ostream &ret, GenAction *condition, bool last ) +{ + if ( condition->inlineList->length() == 1 && + condition->inlineList->head->type == + GenInlineItem::NfaWrapAction ) + { + GenAction *action = condition->inlineList->head->wrappedAction; + ACTION( out, action, IlOpts( 0, false, false ) ); + } + else if ( condition->inlineList->length() == 1 && + condition->inlineList->head->type == + GenInlineItem::NfaWrapConds ) + { + ret << + " " << cpc << " = 0;\n"; + + GenCondSpace *condSpace = condition->inlineList->head->condSpace; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + ret << + " if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + ret << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + const CondKeySet &keys = condition->inlineList->head->condKeySet; + if ( keys.length() > 0 ) { + ret << pop_test << " = "; + for ( CondKeySet::Iter cki = keys; cki.lte(); cki++ ) { + ret << "" << cpc << " == " << *cki; + if ( !cki.last() ) + ret << " || "; + } + ret << ";\n"; + } + else { + ret << pop_test << " = 0;\n"; + } + + if ( !last ) { + ret << + "if ( !" << pop_test << " )\n" + " break;\n"; + } + } + else { + ret << pop_test << " = "; + CONDITION( ret, condition ); + ret << ";\n"; + + if ( !last ) { + ret << + "if ( !" << pop_test << " )\n" + " break;\n"; + } + } +} + +void CodeGen::NFA_POP_TEST_EXEC() +{ + out << + " " << pop_test << " = 1;\n" + " switch ( nfa_bp[nfa_len].popTrans ) {\n"; + + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; + redAct.lte(); redAct++ ) + { + if ( redAct->numNfaPopTestRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + NFA_CONDITION( out, item->value, item.last() ); + + out << CEND() << "\n}\n"; + } + } + + out << + " }\n" + "\n"; +} + + +string CodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string CodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +void CodeGen::writeInit() +{ + out << " {\n"; + + if ( !noCS ) + out << "\t" << vCS() << " = " << CAST("int") << START() << ";\n"; + + if ( redFsm->anyNfaStates() ) + out << "\t" << "nfa_len = 0;\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionNcalls() || + redFsm->anyActionRets() || redFsm->anyActionNrets() ) + { + out << "\t" << TOP() << " = 0;\n"; + } + + if ( red->hasLongestMatch ) { + out << + " " << TOKSTART() << " = " << NIL() << ";\n" + " " << TOKEND() << " = " << NIL() << ";\n"; + + if ( redFsm->usingAct() ) { + out << + " " << ACT() << " = 0;\n"; + } + } + out << " }\n"; +} + +string CodeGen::DATA_PREFIX() +{ + if ( !noPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string CodeGen::ALPH_TYPE() +{ + string ret = alphType->data1; + if ( alphType->data2 != 0 ) { + ret += " "; + ret += + alphType->data2; + } + return ret; +} + +void CodeGen::VALUE( string type, string name, string value ) +{ + if ( backend == Direct ) + out << "static const " << type << " " << name << " = " << value << ";\n"; + else + out << "value " << type << " " << name << " = " << value << ";\n"; +} + +string CodeGen::STR( int v ) +{ + ostringstream s; + s << v; + return s.str(); +} + +void CodeGen::STATE_IDS() +{ + if ( redFsm->startState != 0 ) + VALUE( "int", START(), START_STATE_ID() ); + + if ( !noFinal ) + VALUE( "int", FIRST_FINAL(), FIRST_FINAL_STATE() ); + + if ( !noError ) + VALUE( "int", ERROR(), ERROR_STATE() ); + + out << "\n"; + + if ( red->entryPointNames.length() > 0 ) { + for ( EntryNameVect::Iter en = red->entryPointNames; en.lte(); en++ ) { + string name = DATA_PREFIX() + "en_" + *en; + VALUE( "int", name, STR( red->entryPointIds[en.pos()] ) ); + } + out << "\n"; + } +} + +void CodeGen::writeStart() +{ + out << START_STATE_ID(); +} + +void CodeGen::writeFirstFinal() +{ + out << FIRST_FINAL_STATE(); +} + +void CodeGen::writeError() +{ + out << ERROR_STATE(); +} + +void CodeGen::writeExports() +{ + if ( red->exportList.length() > 0 ) { + for ( ExportList::Iter ex = red->exportList; ex.lte(); ex++ ) { + out << EXPORT( ALPH_TYPE(), + DATA_PREFIX() + "ex_" + ex->name, KEY(ex->key) ) << "\n"; + } + out << "\n"; + } +} + +void CodeGen::NFA_PUSH( std::string state ) +{ + if ( redFsm->anyNfaStates() ) { + out << + " if ( " << ARR_REF( nfaOffsets ) << "[" << state << "] != 0 ) {\n" + " " << alt << " = 0; \n" + " " << new_recs << " = " << CAST("int") << ARR_REF( nfaTargs ) << "[" << CAST("int") << + ARR_REF( nfaOffsets ) << "[" << state << "]];\n"; + + if ( red->nfaPrePushExpr != 0 ) { + out << OPEN_HOST_BLOCK( red->nfaPrePushExpr ); + INLINE_LIST( out, red->nfaPrePushExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); + out << "\n"; + genOutputLineDirective( out ); + } + + out << + " while ( " << alt << " < " << new_recs << " ) { \n"; + + + out << + " nfa_bp[nfa_len].state = " << CAST("int") << ARR_REF( nfaTargs ) << "[" << CAST("int") << + ARR_REF( nfaOffsets ) << "[" << state << "] + 1 + " << alt << "];\n" + " nfa_bp[nfa_len].p = " << P() << ";\n"; + + if ( redFsm->bAnyNfaPops ) { + out << + " nfa_bp[nfa_len].popTrans = " << ARR_REF( nfaPopTrans ) << "[" << CAST("long") << + ARR_REF( nfaOffsets ) << "[" << state << "] + 1 + " << alt << "];\n" + "\n" + ; + } + + if ( redFsm->bAnyNfaPushes ) { + out << + " switch ( " << ARR_REF( nfaPushActions ) << "[" << CAST("int") << + ARR_REF( nfaOffsets ) << "[" << state << "] + 1 + " << alt << "] ) {\n"; + + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; + redAct.lte(); redAct++ ) + { + if ( redAct->numNfaPushRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + out << + " }\n"; + } + + + out << + " nfa_len += 1;\n" + " " << alt << " += 1;\n" + " }\n" + " }\n" + ; + } +} + +void CodeGen::NFA_POST_POP() +{ + if ( red->nfaPostPopExpr != 0 ) { + out << OPEN_HOST_BLOCK( red->nfaPostPopExpr ); + INLINE_LIST( out, red->nfaPostPopExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); + } +} diff --git a/ragel/codegen.h b/ragel/codegen.h new file mode 100644 index 00000000..be5f0239 --- /dev/null +++ b/ragel/codegen.h @@ -0,0 +1,461 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_CODEGEN_H +#define _C_CODEGEN_H + +#include <iostream> +#include <string> +#include <stdio.h> +#include "common.h" +#include "gendata.h" +#include "vector.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +//#define IALL 8 + +#define IALL_INTEGRAL 8 +#define IALL_STRING 128 + + +/* Forwards. */ +struct RedFsmAp; +struct RedStateAp; +struct CodeGenData; +struct GenAction; +struct NameInst; +struct GenInlineItem; +struct GenInlineList; +struct RedAction; +struct LongestMatch; +struct LongestMatchPart; + +string itoa( int i ); + +struct Variable +{ + Variable( const char *name ) : name(name), isReferenced(false) {} + + operator const std::string() { isReferenced = true; return name; } + void reference() { isReferenced = true; } + + const char *name; + bool isReferenced; +}; + +struct GotoLabel +{ + GotoLabel( const char *name ) : name(name), isReferenced(false) {} + + operator std::string() { isReferenced = true; return name; } + void reference() { isReferenced = true; } + + const char *name; + bool isReferenced; +}; + +std::ostream &operator<<( std::ostream &out, GotoLabel &l ); +std::ostream &operator<<( std::ostream &out, Variable &v ); + +struct TableArray; +typedef Vector<TableArray*> ArrayVector; +struct CodeGen; + +struct TableArray +{ + enum State { + InitialState = 1, + AnalyzePass, + GeneratePass + }; + + TableArray( const char *name, CodeGen &codeGen ); + + void start(); + void startAnalyze(); + void startGenerate(); + + void setType( std::string type, int width, bool isChar ) + { + this->type = type; + this->width = width; + this->isChar = isChar; + } + + std::string ref(); + + void value( long long v ); + + void valueAnalyze( long long v ); + void valueGenerate( long long v ); + void stringGenerate( long long value ); + + void finish(); + void finishAnalyze(); + void finishGenerate(); + + void setState( TableArray::State state ) + { this->state = state; } + + long long size(); + + State state; + const char *name; + std::string type; + int width; + bool isSigned; + bool isChar; + bool stringTables; + int iall; + long long values; + long long min; + long long max; + CodeGen &codeGen; + std::ostream &out; + int ln; + bool isReferenced; + bool started; +}; + +struct IlOpts +{ + IlOpts( int targState, bool inFinish, bool csForced ) + : targState(targState), inFinish(inFinish), csForced(csForced) {} + + int targState; + bool inFinish; + bool csForced; +}; + + +/* + * class CodeGen + */ +class CodeGen : public CodeGenData +{ +public: + CodeGen( const CodeGenArgs &args ); + + virtual ~CodeGen() {} + + virtual void writeInit(); + virtual void writeStart(); + virtual void writeFirstFinal(); + virtual void writeError(); + virtual void statsSummary(); + +protected: + friend class TableArray; + typedef Vector<TableArray*> ArrayVector; + ArrayVector arrayVector; + + Variable cpc; + Variable pop_test; + Variable new_recs; + Variable alt; + + string FSM_NAME(); + string START_STATE_ID(); + void taActions(); + string KEY( Key key ); + string LDIR_PATH( char *path ); + + void ACTION( ostream &ret, GenAction *action, IlOpts opts ); + void NFA_CONDITION( ostream &ret, GenAction *condition, bool last ); + void NFA_POP_TEST_EXEC(); + void CONDITION( ostream &ret, GenAction *condition ); + string ALPH_TYPE(); + + bool isAlphTypeSigned(); + long long tableData; + RagelBackend backend; + bool stringTables; + BackendFeature backendFeature; + + TableArray nfaTargs; + TableArray nfaOffsets; + TableArray nfaPushActions; + TableArray nfaPopTrans; + + virtual string GET_KEY(); + + string P(); + string PE(); + string vEOF(); + + string ACCESS(); + string vCS(); + string STACK(); + string TOP(); + string TOKSTART(); + string TOKEND(); + string ACT(); + + string DATA_PREFIX(); + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + + /* Declare a variable only if referenced. */ + void DECLARE( std::string type, Variable &var, std::string init = "" ); + + string CAST( string type ); + + string ARR_TYPE( const TableArray &ta ) + { return ta.type; } + + string ARR_REF( TableArray &ta ) + { return ta.ref(); } + + void INLINE_EXPR( ostream &ret, GenInlineList *inlineList ); + void INLINE_BLOCK( ostream &ret, GenInlineExpr *inlineExpr ); + void INLINE_PLAIN( ostream &ret, GenInlineExpr *inlineExpr ); + + void INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ); + virtual void GOTO( ostream &ret, int gotoDest, bool inFinish ) = 0; + virtual void CALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0; + virtual void NCALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0; + virtual void NEXT( ostream &ret, int nextDest, bool inFinish ) = 0; + virtual void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) = 0; + virtual void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) = 0; + virtual void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, + int targState, bool inFinish ) = 0; + virtual void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, + int targState, bool inFinish ) = 0; + virtual void RET( ostream &ret, bool inFinish ) = 0; + virtual void NRET( ostream &ret, bool inFinish ) = 0; + virtual void BREAK( ostream &ret, int targState, bool csForced ) = 0; + virtual void NBREAK( ostream &ret, int targState, bool csForced ) = 0; + virtual void CURS( ostream &ret, bool inFinish ) = 0; + virtual void TARGS( ostream &ret, bool inFinish, int targState ) = 0; + void EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, GenInlineItem *item, int targState, + int inFinish, bool csForced ); + void LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, GenInlineItem *item ); + void INIT_TOKSTART( ostream &ret, GenInlineItem *item ); + void INIT_ACT( ostream &ret, GenInlineItem *item ); + void SET_TOKSTART( ostream &ret, GenInlineItem *item ); + void SET_TOKEND( ostream &ret, GenInlineItem *item ); + void GET_TOKEND( ostream &ret, GenInlineItem *item ); + + void HOST_STMT( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void HOST_EXPR( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void HOST_TEXT( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void GEN_STMT( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void GEN_EXPR( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + + void STATE_IDS(); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + string STR( int v ); + + void VALUE( string type, string name, string value ); + + string ACCESS_OPER() + { return backend == Direct ? "" : " -> "; } + + string OPEN_HOST_EXPR() + { return backend == Direct ? "(" : "host( \"-\", 1 ) ={"; } + + string OPEN_HOST_EXPR( string fileName, int line ) + { + return backend == Direct ? "(" : "host( \"" + fileName + "\", " + STR(line) + " ) ={"; + } + + string CLOSE_HOST_EXPR() + { return backend == Direct ? ")" : "}="; } + + string OPEN_HOST_BLOCK( string fileName, int line ) + { + if ( backend == Direct ) { + std::stringstream ss; + ss << "{\n" ; + (*genLineDirective)( ss, lineDirectives, line, fileName.c_str() ); + return ss.str(); + } + else { + return "host( \"" + fileName + "\", " + STR(line) + " ) ${"; + } + } + + string OPEN_HOST_BLOCK( GenInlineExpr *inlineExpr ) + { + return OPEN_HOST_BLOCK( inlineExpr->loc.fileName, inlineExpr->loc.line ); + } + + string CLOSE_HOST_BLOCK() + { return backend == Direct ? "}\n" : "}$"; } + + string OPEN_HOST_PLAIN() + { return backend == Direct ? "" : "host( \"-\", 1 ) @{"; } + + string CLOSE_HOST_PLAIN() + { return backend == Direct ? "" : "}@"; } + + string OPEN_GEN_EXPR() + { return backend == Direct ? "(" : "={"; } + + string CLOSE_GEN_EXPR() + { return backend == Direct ? ")" : "}="; } + + string OPEN_GEN_BLOCK() + { return backend == Direct ? "{" : "${"; } + + string CLOSE_GEN_BLOCK() + { return backend == Direct ? "}" : "}$"; } + + string OPEN_GEN_PLAIN() + { return backend == Direct ? "" : "@{"; } + + string CLOSE_GEN_PLAIN() + { return backend == Direct ? "" : "}@"; } + + string INT() + { return "int"; } + + string UINT() + { return backend == Direct ? "unsigned int" : "uint"; } + + string INDEX( string type, string name ) + { + if ( backend == Direct ) + return "const " + type + " *" + name; + else + return "index " + type + " " + name; + } + + string INDEX( string type ) + { + if ( backend == Direct ) + return "const " + type + " *"; + else + return "index " + type + " "; + } + + string LABEL( string name ) + { + return name + ": "; + } + + string EMIT_LABEL( GotoLabel label ) + { + if ( label.isReferenced ) + return std::string(label.name) + ": {}\n"; + else + return ""; + } + + string OFFSET( string arr, string off ) + { + if ( backend == Direct ) + return "( " + arr + " + (" + off + "))"; + else + return "offset( " + arr + ", " + off + " )"; + } + + string TRUE() + { + if ( backend == Direct ) + return "1"; + else + return "TRUE"; + } + + string DEREF( string arr, string off ) + { + if ( backend == Direct ) + return "(*( " + off + "))"; + else + return "deref( " + arr + ", " + off + " )"; + } + + string CASE( string val ) + { + if ( backend == Direct ) + return "case " + val + ": "; + else + return "case " + val; + } + + string DEFAULT() + { + if ( backend == Direct ) + return "default:"; + else + return "default"; + } + + string CEND( ) + { + if ( backend == Direct ) + return " break; "; + else + return " "; + } + + string FALLTHROUGH() + { + if ( backend == Direct ) + return " "; + else + return "fallthrough;"; + } + + string NIL() + { + if ( backend == Direct ) + return "0"; + else + return "nil"; + } + + string EXPORT( string type, string name, string value ) + { + if ( backend == Direct ) + return "#define " + name + " " + value; + else + return "export " + type + " " + name + " " + value + ";"; + } + + void NFA_POST_POP(); + virtual void NFA_PUSH( std::string ); + virtual void NFA_POP() = 0; + virtual void LOCATE_TRANS() {} + virtual void LOCATE_COND() {} + virtual void EOF_TRANS() {} + + + virtual void COND_EXEC( std::string expr ) {} + virtual void COND_BIN_SEARCH( Variable &var, TableArray &keys, std::string ok, std::string error ) {} + +public: + virtual void writeExports(); +}; + +#endif diff --git a/ragel/common.cc b/ragel/common.cc new file mode 100644 index 00000000..6e0f5c0c --- /dev/null +++ b/ragel/common.cc @@ -0,0 +1,482 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pcheck.h" +#include "common.h" +#include "stdlib.h" +#include <string.h> +#include <assert.h> +#include "ragel.h" + +/* + * C + */ + +const char *defaultOutFnC( const char *inputFileName ) +{ + const char *ext = findFileExtension( inputFileName ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + return fileNameFromStem( inputFileName, ".h" ); + else + return fileNameFromStem( inputFileName, ".c" ); +} + +HostType hostTypesC[] = +{ + { "char", 0, "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, sizeof(char) }, + { "signed", "char", "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, sizeof(char) }, + { "unsigned", "char", "uchar", false, true, false, 0, 0, 0, UCHAR_MAX, sizeof(unsigned char) }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, sizeof(short) }, + { "signed", "short", "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, sizeof(short) }, + { "unsigned", "short", "ushort", false, true, false, 0, 0, 0, USHRT_MAX, sizeof(unsigned short) }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "signed", "int", "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "unsigned", "int", "uint", false, true, false, 0, 0, 0, UINT_MAX, sizeof(unsigned int) }, + { "long", 0, "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, sizeof(long) }, + { "signed", "long", "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, sizeof(long) }, + { "unsigned", "long", "ulong", false, true, false, 0, 0, 0, ULONG_MAX, sizeof(unsigned long) }, +}; + +const HostLang hostLangC = { + hostTypesC, + 12, + 0, + true, + false, /* loopLabels */ + Direct, + GotoFeature, + &makeCodeGen, + &defaultOutFnC, + &genLineDirectiveC +}; + +/* + * ASM + */ +const char *defaultOutFnAsm( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".s" ); +} + +HostType hostTypesAsm[] = +{ + { "char", 0, "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, sizeof(char) }, + { "unsigned", "char", "uchar", false, true, false, 0, 0, 0, UCHAR_MAX, sizeof(unsigned char) }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, sizeof(short) }, + { "unsigned", "short", "ushort", false, true, false, 0, 0, 0, USHRT_MAX, sizeof(unsigned short) }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "unsigned", "int", "uint", false, true, false, 0, 0, 0, UINT_MAX, sizeof(unsigned int) }, + { "long", 0, "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, sizeof(long) }, + { "unsigned", "long", "ulong", false, true, false, 0, 0, 0, ULONG_MAX, sizeof(unsigned long) }, +}; + +const HostLang hostLangAsm = { + hostTypesAsm, + 8, + 0, + true, + false, /* loopLabels */ + Direct, + GotoFeature, + &makeCodeGenAsm, + &defaultOutFnC, + &genLineDirectiveAsm +}; + +HostType *findAlphType( const HostLang *hostLang, const char *s1 ) +{ + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 == 0 ) + { + return hostLang->hostTypes + i; + } + } + + return 0; +} + +HostType *findAlphType( const HostLang *hostLang, const char *s1, const char *s2 ) +{ + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 != 0 && + strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 ) + { + return hostLang->hostTypes + i; + } + } + + return 0; +} + +HostType *findAlphTypeInternal( const HostLang *hostLang, const char *s1 ) +{ + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].internalName ) == 0 ) + return hostLang->hostTypes + i; + } + + return 0; +} + +/* Construct a new parameter checker with for paramSpec. */ +ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv ) +: + state(noparam), + argOffset(0), + curArg(0), + iCurArg(1), + paramSpec(paramSpec), + argc(argc), + argv(argv) +{ +} + +/* Check a single option. Returns the index of the next parameter. Sets p to + * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if + * there is one, NULL otherwise. */ +bool ParamCheck::check() +{ + bool requiresParam; + + if ( iCurArg >= argc ) { /* Off the end of the arg list. */ + state = noparam; + return false; + } + + if ( argOffset != 0 && *argOffset == 0 ) { + /* We are at the end of an arg string. */ + iCurArg += 1; + if ( iCurArg >= argc ) { + state = noparam; + return false; + } + argOffset = 0; + } + + if ( argOffset == 0 ) { + /* Set the current arg. */ + curArg = argv[iCurArg]; + + /* We are at the beginning of an arg string. */ + if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ + argv[iCurArg][0] != '-' || /* Not a param. */ + argv[iCurArg][1] == 0 ) { /* Only a dash. */ + parameter = 0; + paramArg = 0; + + iCurArg += 1; + state = noparam; + return true; + } + argOffset = argv[iCurArg] + 1; + } + + /* Get the arg char. */ + char argChar = *argOffset; + + /* Loop over all the parms and look for a match. */ + const char *pSpec = paramSpec; + while ( *pSpec != 0 ) { + char pSpecChar = *pSpec; + + /* If there is a ':' following the char then + * it requires a parm. If a parm is required + * then move ahead two in the parmspec. Otherwise + * move ahead one in the parm spec. */ + if ( pSpec[1] == ':' ) { + requiresParam = true; + pSpec += 2; + } + else { + requiresParam = false; + pSpec += 1; + } + + /* Do we have a match. */ + if ( argChar == pSpecChar ) { + if ( requiresParam ) { + if ( argOffset[1] == 0 ) { + /* The param must follow. */ + if ( iCurArg + 1 == argc ) { + /* We are the last arg so there + * cannot be a parameter to it. */ + parameter = argChar; + paramArg = 0; + iCurArg += 1; + argOffset = 0; + state = invalid; + return true; + } + else { + /* the parameter to the arg is the next arg. */ + parameter = pSpecChar; + paramArg = argv[iCurArg + 1]; + iCurArg += 2; + argOffset = 0; + state = match; + return true; + } + } + else { + /* The param for the arg is built in. */ + parameter = pSpecChar; + paramArg = argOffset + 1; + iCurArg += 1; + argOffset = 0; + state = match; + return true; + } + } + else { + /* Good, we matched the parm and no + * arg is required. */ + parameter = pSpecChar; + paramArg = 0; + argOffset += 1; + state = match; + return true; + } + } + } + + /* We did not find a match. Bad Argument. */ + parameter = argChar; + paramArg = 0; + argOffset += 1; + state = invalid; + return true; +} + +std::streamsize output_filter::countAndWrite( const char *s, std::streamsize n ) +{ + for ( int i = 0; i < n; i++ ) { + switch ( s[i] ) { + case '\n': + line += 1; + break; + case '{': + /* If we detec an open block then eliminate the single-indent + * addition, which is to account for single statements. */ + singleIndent = false; + level += 1; + break; + case '}': + level -= 1; + break; + } + } + + return std::filebuf::xsputn( s, n ); +} + +bool openSingleIndent( const char *s, int n ) +{ + if ( n >= 3 && memcmp( s, "if ", 3 ) == 0 ) + return true; + + if ( n >= 8 && memcmp( s, "else if ", 8 ) == 0 ) + return true; + + if ( n >= 5 && memcmp( s, "else\n", 4 ) == 0 ) + return true; + + return false; +} + +/* Counts newlines before sending sync. */ +int output_filter::sync( ) +{ + line += 1; + return std::filebuf::sync(); +} + +/* Counts newlines before sending data out to file. */ +std::streamsize output_filter::xsputn( const char *s, std::streamsize n ) +{ + std::streamsize ret = n; + int l; + +restart: + if ( indent ) { + /* Consume mode Looking for the first non-whitespace. */ + while ( n > 0 && ( *s == ' ' || *s == '\t' ) ) { + s += 1; + n -= 1; + } + + if ( n > 0 ) { + int tabs = level + ( singleIndent ? 1 : 0 ); + + if ( *s == '}' ) { + /* If the next char is de-dent, then reduce the tabs. This is + * not a stream state change. The level reduction will be + * computed in write. */ + tabs -= 1; + } + + /* Note that the count and write will eliminate this if it detects + * an open block. */ + if ( openSingleIndent( s, n ) ) + singleIndent = true; + else + singleIndent = false; + + if ( *s != '#' ) { + /* Found some data, print the indentation and turn off indentation + * mode. */ + for ( l = 0; l < tabs; l++ ) + countAndWrite( "\t", 1 ); + } + + + indent = 0; + + goto restart; + } + } + else { + char *nl; + if ( (nl = (char*)memchr( s, '\n', n )) ) { + /* Print up to and including the newline. */ + int wl = nl - s + 1; + countAndWrite( s, wl ); + + /* Go into consume state. If we see more non-indentation chars we + * will generate the appropriate indentation level. */ + s += wl; + n -= wl; + indent = true; + goto restart; + } + else { + /* Indentation off, or no indent trigger (newline). */ + countAndWrite( s, n ); + } + } + + // What to do here? + return ret; +} + +/* Scans a string looking for the file extension. If there is a file + * extension then pointer returned points to inside the string + * passed in. Otherwise returns null. */ +const char *findFileExtension( const char *stemFile ) +{ + const char *ppos = stemFile + strlen(stemFile) - 1; + + /* Scan backwards from the end looking for the first dot. + * If we encounter a '/' before the first dot, then stop the scan. */ + while ( 1 ) { + /* If we found a dot or got to the beginning of the string then + * we are done. */ + if ( ppos == stemFile || *ppos == '.' ) + break; + + /* If we hit a / then there is no extension. Done. */ + if ( *ppos == '/' ) { + ppos = stemFile; + break; + } + ppos--; + } + + /* If we got to the front of the string then bail we + * did not find an extension */ + if ( ppos == stemFile ) + ppos = 0; + + return ppos; +} + +/* Make a file name from a stem. Removes the old filename suffix and + * replaces it with a new one. Returns a newed up string. */ +const char *fileNameFromStem( const char *stemFile, const char *suffix ) +{ + long len = strlen( stemFile ); + assert( len > 0 ); + + /* Get the extension. */ + const char *ppos = findFileExtension( stemFile ); + + /* If an extension was found, then shorten what we think the len is. */ + if ( ppos != 0 ) + len = ppos - stemFile; + + /* Make the return string from the stem and the suffix. */ + char *retVal = new char[ len + strlen( suffix ) + 1 ]; + strncpy( retVal, stemFile, len ); + strcpy( retVal + len, suffix ); + + return retVal; +} + +exit_object endp; + +void operator<<( std::ostream &out, exit_object & ) +{ + out << std::endl; + throw AbortCompile( 1 ); +} + +void genLineDirectiveC( std::ostream &out, bool lineDirectives, int line, const char *fileName ) +{ + if ( !lineDirectives ) + out << "/* "; + + out << "#line " << line << " \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else if ( *pc == '"' ) + out << "\\\""; + else + out << *pc; + } + out << '"'; + + if ( !lineDirectives ) + out << " */"; + + out << '\n'; +} + +void genLineDirectiveAsm( std::ostream &out, bool lineDirectives, int line, const char *fileName ) +{ + out << "/* #line " << line << " \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else if ( *pc == '"' ) + out << "\\\""; + else + out << *pc; + } + out << '"'; + out << " */\n"; +} + +void genLineDirectiveTrans( std::ostream &out, bool lineDirectives, int line, const char *fileName ) +{ +} diff --git a/ragel/common.h b/ragel/common.h new file mode 100644 index 00000000..142eb735 --- /dev/null +++ b/ragel/common.h @@ -0,0 +1,504 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COMMON_H +#define _COMMON_H + +#include <iostream> +#include <fstream> +#include <climits> +#include "dlist.h" + +struct colm_location; + +struct InputData; +struct CodeGenData; +struct HostLang; +struct CodeGenArgs; + +enum RagelBackend +{ + Direct, + Translated +}; + +enum BackendFeature +{ + GotoFeature, + BreakFeature, + VarFeature +}; + +#define S8BIT_MIN -128 +#define S8BIT_MAX 127 + +#define U8BIT_MIN 0 +#define U8BIT_MAX 255 + +#define S16BIT_MIN -32768 +#define S16BIT_MAX 32767 + +#define U16BIT_MIN 0 +#define U16BIT_MAX 65535 + +#define S31BIT_MIN -1073741824l +#define S31BIT_MAX 1073741823l + +#define S32BIT_MIN -2147483648l +#define S32BIT_MAX 2147483647l + +#define U32BIT_MIN 0 +#define U32BIT_MAX 4294967295l + +#define S64BIT_MIN (-9223372036854775807LL - 1LL) +#define S64BIT_MAX 9223372036854775807LL + +#define U64BIT_MIN 0 +#define U64BIT_MAX 18446744073709551615ULL + +struct ParserLoc +{ + const char *fileName; + int line; + int col; +}; + +/* Location in an input file. */ +struct InputLoc +{ + InputLoc( colm_location *pcloc ); + + InputLoc() : fileName(0), line(-1), col(-1) {} + + InputLoc( const ParserLoc loc ) + { + fileName = loc.fileName; + line = loc.line; + col = loc.col; + + if ( fileName == 0 ) + fileName = "-"; + if ( line == 0 ) + line = 1; + } + + InputLoc( const InputLoc &loc ) + { + fileName = loc.fileName; + line = loc.line; + col = loc.col; + + if ( fileName == 0 ) + fileName = "-"; + if ( line == 0 ) + line = 1; + } + + InputLoc( const char *fileName, int line, int col ) + : fileName(fileName), line(line), col(col) {} + + const char *fileName; + int line; + int col; +}; + +extern InputLoc internal; + +typedef unsigned long long Size; + +struct Key +{ +private: + long key; + +public: + friend struct KeyOps; + + Key( ) {} + Key( const Key &key ) : key(key.key) {} + Key( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const + { + return ( 7 <= key && key <= 13 ) || ( 32 <= key && key < 127 ); + } + + Key toUpper() const + { return Key( 'A' + ( key - 'a' ) ); } + Key toLower() const + { return Key( 'a' + ( key - 'A' ) ); } +}; + +struct CondKey +{ +private: + long key; + +public: + friend inline bool operator<( const CondKey key1, const CondKey key2 ); + friend inline bool operator>( const CondKey key1, const CondKey key2 ); + friend inline bool operator==( const CondKey key1, const CondKey key2 ); + friend inline CondKey operator+( const CondKey key1, const CondKey key2 ); + friend inline CondKey operator-( const CondKey key1, const CondKey key2 ); + + friend struct KeyOps; + + CondKey( ) {} + CondKey( const CondKey &key ) : key(key.key) {} + CondKey( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const + { + return ( 7 <= key && key <= 13 ) || ( 32 <= key && key < 127 ); + } + + CondKey toUpper() const + { return CondKey( 'A' + ( key - 'a' ) ); } + CondKey toLower() const + { return CondKey( 'a' + ( key - 'A' ) ); } + + /* Decrement. Needed only for ranges. */ + inline void decrement(); + inline void increment(); +}; + +inline CondKey operator+(const CondKey key1, const CondKey key2) +{ + return CondKey( key1.key + key2.key ); +} + +inline CondKey operator-(const CondKey key1, const CondKey key2) +{ + return CondKey( key1.key - key2.key ); +} + +struct HostType +{ + const char *data1; + const char *data2; + const char *internalName; + bool isSigned; + bool isOrd; + bool isChar; + long long sMinVal; + long long sMaxVal; + unsigned long long uMinVal; + unsigned long long uMaxVal; + unsigned int size; +}; + +typedef void (*GenLineDirectiveT)( std::ostream &out, bool nld, int line, const char *file ); +typedef const char *(*DefaultOutFnT)( const char *inputFileName ); +typedef CodeGenData *(*MakeCodeGenT)( const HostLang *hostLang, const CodeGenArgs &args ); + +struct HostLang +{ + HostType *hostTypes; + int numHostTypes; + int defaultAlphType; + bool explicitUnsigned; + bool loopLabels; + + RagelBackend backend; + BackendFeature feature; + + MakeCodeGenT makeCodeGen; + DefaultOutFnT defaultOutFn; + GenLineDirectiveT genLineDirective; +}; + +void genLineDirectiveC( std::ostream &out, bool nld, int line, const char *file ); +void genLineDirectiveAsm( std::ostream &out, bool nld, int line, const char *file ); +void genLineDirectiveTrans( std::ostream &out, bool nld, int line, const char *file ); + +extern const HostLang hostLangC; +extern const HostLang hostLangAsm; + +HostType *findAlphType( const HostLang *hostLang, const char *s1 ); +HostType *findAlphType( const HostLang *hostLang, const char *s1, const char *s2 ); +HostType *findAlphTypeInternal( const HostLang *hostLang, const char *s1 ); + +const char *defaultOutFnC( const char *inputFileName ); +extern HostType hostTypesC[]; + +/* An abstraction of the key operators that manages key operations such as + * comparison and increment according the signedness of the key. */ +struct KeyOps +{ + /* Defaults to C "char" type: Signed 8 bit. */ + KeyOps() + : + isSigned(true), + explicitUnsigned(true), + minKey(CHAR_MIN), + maxKey(CHAR_MAX) + {} + + bool isSigned; + bool explicitUnsigned; + Key minKey, maxKey; + + void setAlphType( const HostLang *hostLang, const HostType *alphType ) + { + isSigned = alphType->isSigned; + explicitUnsigned = hostLang->explicitUnsigned; + + if ( isSigned ) { + minKey = (long) alphType->sMinVal; + maxKey = (long) alphType->sMaxVal; + } + else { + minKey = (long) alphType->uMinVal; + maxKey = (long) alphType->uMaxVal; + } + } + + /* Compute the distance between two keys. */ + Size span( Key key1, Key key2 ) + { + return isSigned ? + (unsigned long long)( + (long long)key2.key - + (long long)key1.key + 1) : + (unsigned long long)( + (unsigned long)key2.key) - + (unsigned long long)((unsigned long)key1.key) + 1; + } + + Size alphSize() + { return span( minKey, maxKey ); } + + inline bool lt( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key < key2.key : + (unsigned long)key1.key < (unsigned long)key2.key; + } + + inline bool le( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key <= key2.key : + (unsigned long)key1.key <= (unsigned long)key2.key; + } + + inline bool gt( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key > key2.key : + (unsigned long)key1.key > (unsigned long)key2.key; + } + + inline bool ge( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key >= key2.key : + (unsigned long)key1.key >= (unsigned long)key2.key; + } + + inline bool eq( const Key key1, const Key key2 ) + { + return key1.key == key2.key; + } + + inline bool ne( const Key key1, const Key key2 ) + { + return key1.key != key2.key; + } + + inline Key add(const Key key1, const Key key2) + { + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key + key2.key ); + } + + inline Key sub(const Key key1, const Key key2) + { + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key - key2.key ); + } + + /* Decrement. Needed only for ranges. */ + inline void decrement( Key &key ) + { + key.key = this->isSigned ? key.key - 1 : ((unsigned long)key.key)-1; + } + + /* Increment. Needed only for ranges. */ + inline void increment( Key &key ) + { + key.key = this->isSigned ? key.key+1 : ((unsigned long)key.key)+1; + } + + /* Returns the key casted to a long long. This form of the key does not + * require any signedness interpretation. */ + inline long long getLongLong( const Key &key ) + { + return this->isSigned ? (long long)key.key : (long long)(unsigned long)key.key; + } +}; + +/* CondKey */ + +inline bool operator<( const CondKey key1, const CondKey key2 ) +{ + return key1.key < key2.key; +} + +inline bool operator>( const CondKey key1, const CondKey key2 ) +{ + return key1.key > key2.key; +} + +inline bool operator==( const CondKey key1, const CondKey key2 ) +{ + return key1.key == key2.key; +} + +/* Increment. Needed only for ranges. */ +inline void CondKey::increment() +{ + key = key + 1; +} + + +/* Filter on the output stream that keeps track of the number of lines + * output. */ +class output_filter +: + public std::filebuf +{ +public: + output_filter( const char *fileName ) + : + fileName(fileName), + line(1), + level(0), + indent(false), + singleIndent(false) + {} + + virtual int sync(); + virtual std::streamsize xsputn( const char* s, std::streamsize n ); + + std::streamsize countAndWrite( const char* s, std::streamsize n ); + + const char *fileName; + int line; + int level; + bool indent; + bool singleIndent; +}; + +class nullbuf +: + public std::streambuf +{ +public: + virtual std::streamsize xsputn( const char * s, std::streamsize n ) + { return n; } + + virtual int overflow( int c ) + { return 1; } +}; + +class cfilebuf : public std::streambuf +{ +public: + cfilebuf( char *fileName, FILE* file ) : fileName(fileName), file(file) { } + char *fileName; + FILE *file; + + int sync() + { + fflush( file ); + return 0; + } + + int overflow( int c ) + { + if ( c != EOF ) + fputc( c, file ); + return 0; + } + + std::streamsize xsputn( const char* s, std::streamsize n ) + { + std::streamsize written = fwrite( s, 1, n, file ); + return written; + } +}; + +class costream : public std::ostream +{ +public: + costream( cfilebuf *b ) : + std::ostream(b), b(b) {} + + ~costream() + { delete b; } + + void fclose() + { ::fclose( b->file ); } + + cfilebuf *b; +}; + + +const char *findFileExtension( const char *stemFile ); +const char *fileNameFromStem( const char *stemFile, const char *suffix ); + +struct Export +{ + Export( std::string name, Key key ) + : name(name), key(key) {} + + std::string name; + Key key; + + Export *prev, *next; +}; + +typedef DList<Export> ExportList; + +struct exit_object { }; +extern exit_object endp; +void operator<<( std::ostream &out, exit_object & ); + +enum RagelFrontend +{ + KelbtBased, + ReduceBased +}; + +CodeGenData *makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ); +CodeGenData *makeCodeGenAsm( const HostLang *hostLang, const CodeGenArgs &args ); + +#endif diff --git a/ragel/config.h.cmake.in b/ragel/config.h.cmake.in new file mode 100644 index 00000000..ad4bf494 --- /dev/null +++ b/ragel/config.h.cmake.in @@ -0,0 +1,13 @@ +/* config.h Generated from config.h.cmake.in by cmake */ + +#ifndef _COLM_CONFIG_H +#define _COLM_CONFIG_H + +#cmakedefine DEBUG 1 + +#cmakedefine HAVE_SYS_WAIT_H 1 + +#cmakedefine SIZEOF_INT @SIZEOF_INT@ +#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ + +#endif /* _COLM_CONFIG_H */ diff --git a/ragel/dot.cc b/ragel/dot.cc new file mode 100644 index 00000000..edd4225b --- /dev/null +++ b/ragel/dot.cc @@ -0,0 +1,399 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "dot.h" +#include "gendata.h" +#include "inputdata.h" +#include "parsedata.h" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +void GraphvizDotGen::key( Key key ) +{ + if ( id->displayPrintables && key.isPrintable() ) { + // Output values as characters, ensuring we escape the quote (") character + char cVal = (char) key.getVal(); + switch ( cVal ) { + case '"': case '\\': + out << "'\\" << cVal << "'"; + break; + case '\a': + out << "'\\\\a'"; + break; + case '\b': + out << "'\\\\b'"; + break; + case '\t': + out << "'\\\\t'"; + break; + case '\n': + out << "'\\\\n'"; + break; + case '\v': + out << "'\\\\v'"; + break; + case '\f': + out << "'\\\\f'"; + break; + case '\r': + out << "'\\\\r'"; + break; + case ' ': + out << "SP"; + break; + default: + out << "'" << cVal << "'"; + break; + } + } + else { + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); + } +} + +void GraphvizDotGen::condSpec( CondSpace *condSpace, long condVals ) +{ + if ( condSpace != 0 ) { + out << "("; + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + bool set = condVals & (1 << csi.pos()); + if ( !set ) + out << "!"; + (*csi)->actionName( out ); + if ( !csi.last() ) + out << ", "; + } + out << ")"; + } +} + +void GraphvizDotGen::onChar( Key lowKey, Key highKey, CondSpace *condSpace, long condVals ) +{ + /* Output the key. Possibly a range. */ + key( lowKey ); + if ( keyOps->ne( highKey, lowKey ) ) { + out << ".."; + key( highKey ); + } + + condSpec( condSpace, condVals ); +} + + +void GraphvizDotGen::fromStateAction( StateAp *fromState ) +{ + int n = 0; + ActionTable *actionTables[3] = { 0, 0, 0 }; + + if ( fromState->fromStateActionTable.length() != 0 ) + actionTables[n++] = &fromState->fromStateActionTable; + + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( ActionTable::Iter actIt = actionTables[a]->first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + action->actionName( out ); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } + + if ( n > 0 ) + out << " / "; +} + +void GraphvizDotGen::transAction( StateAp *fromState, TransData *trans ) +{ + int n = 0; + ActionTable *actionTables[3] = { 0, 0, 0 }; + + if ( trans->actionTable.length() != 0 ) + actionTables[n++] = &trans->actionTable; + if ( trans->toState != 0 && trans->toState->toStateActionTable.length() != 0 ) + actionTables[n++] = &trans->toState->toStateActionTable; + + if ( n > 0 ) + out << " / "; + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( ActionTable::Iter actIt = actionTables[a]->first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + action->actionName( out ); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } +} + +void GraphvizDotGen::action( ActionTable *actionTable ) +{ + /* The action. */ + out << " / "; + for ( ActionTable::Iter actIt = actionTable->first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + action->actionName( out ); + if ( !actIt.last() ) + out << ", "; + } +} + +void GraphvizDotGen::transList( StateAp *state ) +{ + /* Build the set of unique transitions out of this state. */ + RedTransSet stTransSet; + for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { + if ( tel->plain() ) { + TransDataAp *tdap = tel->tdap(); + + /* Write out the from and to states. */ + out << "\t" << state->alg.stateNum << " -> "; + + if ( tdap->toState == 0 ) + out << "err_" << state->alg.stateNum; + else + out << tdap->toState->alg.stateNum; + + /* Begin the label. */ + out << " [ label = \""; + + fromStateAction( state ); + + onChar( tel->lowKey, tel->highKey, 0, 0 ); + + /* Write the action and close the transition. */ + transAction( state, tdap ); + + out << "\" ];\n"; + } + else { + for ( CondList::Iter ctel = tel->tcap()->condList; ctel.lte(); ctel++ ) { + /* Write out the from and to states. */ + out << "\t" << state->alg.stateNum << " -> "; + + if ( ctel->toState == 0 ) + out << "err_" << state->alg.stateNum; + else + out << ctel->toState->alg.stateNum; + + /* Begin the label. */ + out << " [ label = \""; + + fromStateAction( state ); + + onChar( tel->lowKey, tel->highKey, tel->condSpace, ctel->key.getVal() ); + + /* Write the action and close the transition. */ + transAction( state, ctel ); + out << "\" ];\n"; + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter nfa = *state->nfaOut; nfa.lte(); nfa++ ) { + out << "\t" << state->alg.stateNum << + " -> " << nfa->toState->alg.stateNum << + " [ label = \"EP," << nfa->order << " "; + + fromStateAction( state ); + +// if ( nfa->popTest.length() > 0 || +// nfa->popAction.length() > 0 || +// nfa->popCondKeys.length() > 0 ) +// { +// out << " / "; +// } + + if ( nfa->popCondKeys.length() > 0 ) { + for ( CondKeySet::Iter key = nfa->popCondKeys; key.lte(); key++ ) { + out << "("; + long condVals = *key; + for ( CondSet::Iter csi = nfa->popCondSpace->condSet; csi.lte(); csi++ ) { + bool set = condVals & (1 << csi.pos()); + if ( !set ) + out << "!"; + (*csi)->actionName( out ); + if ( !csi.last() ) + out << ", "; + } + out << ") "; + } + } + + if ( nfa->popAction.length() > 0 ) { + for ( ActionTable::Iter pa = nfa->popAction; pa.lte(); pa++ ) { + pa->value->actionName( out ); + if ( !pa.last() ) + out << ","; + } + } + + if ( nfa->popTest.length() > 0 ) { + for ( ActionTable::Iter pt = nfa->popTest; pt.lte(); pt++ ) { + pt->value->actionName( out ); + if ( !pt.last() ) + out << ","; + } + } + + out << "\" ];"; + } + } +} + +bool GraphvizDotGen::makeNameInst( std::string &res, NameInst *nameInst ) +{ + bool written = false; + if ( nameInst->parent != 0 ) + written = makeNameInst( res, nameInst->parent ); + + if ( !nameInst->name.empty() ) { + if ( written ) + res += '_'; + res += nameInst->name; + written = true; + } + + return written; +} + +void GraphvizDotGen::write( ) +{ + out << + "digraph " << fsmName << " {\n" + " rankdir=LR;\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << " node [ shape = point ];\n"; + + if ( fsm->startState != 0 ) + out << " ENTRY;\n"; + + /* Psuedo states for entry points in the entry map. */ + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + StateAp *state = en->value; + out << " en_" << state->alg.stateNum << ";\n"; + } + + /* Psuedo states for final states with eof actions. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + //if ( st->eofTrans != 0 && st->eofTrans->action != 0 ) + // out << " eof_" << st->id << ";\n"; + if ( st->eofActionTable.length() > 0 ) + out << " eof_" << st->alg.stateNum << ";\n"; + } + + out << " node [ shape = circle, height = 0.2 ];\n"; + + /* Psuedo states for states whose default actions go to error. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + bool needsErr = false; + for ( TransList::Iter tel = st->outList; tel.lte(); tel++ ) { + if ( tel->plain() ) { + if ( tel->tdap()->toState == 0 ) { + needsErr = true; + break; + } + } + else { + for ( CondList::Iter ctel = tel->tcap()->condList; ctel.lte(); ctel++ ) { + if ( ctel->toState == 0 ) { + needsErr = true; + break; + } + } + } + } + + if ( needsErr ) + out << " err_" << st->alg.stateNum << " [ label=\"\"];\n"; + } + + /* Attributes common to all nodes, plus double circle for final states. */ + out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n"; + + /* List Final states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->isFinState() ) + out << " " << st->alg.stateNum << ";\n"; + } + + /* List transitions. */ + out << " node [ shape = circle ];\n"; + + /* Walk the states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + transList( st ); + + /* Transitions into the start state. */ + if ( fsm->startState != 0 ) + out << " ENTRY -> " << fsm->startState->alg.stateNum << " [ label = \"IN\" ];\n"; + + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + NameInst *nameInst = fsmCtx->nameIndex[en->key]; + std::string name; + makeNameInst( name, nameInst ); + StateAp *state = en->value; + out << " en_" << state->alg.stateNum << + " -> " << state->alg.stateNum << + " [ label = \"" << name << "\" ];\n"; + } + + /* Out action transitions. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->eofActionTable.length() != 0 ) { + out << " " << st->alg.stateNum << " -> eof_" << + st->alg.stateNum << " [ label = \"EOF"; + + for ( CondKeySet::Iter i = st->outCondKeys; i.lte(); i++ ) { + if ( i.pos() > 0 ) + out << "|"; + condSpec( st->outCondSpace, *i ); + } + + action( &st->eofActionTable ); + out << "\" ];\n"; + } + } + + out << + "}\n"; +} + +void InputData::writeDot( ostream &out ) +{ + ParseData *pd = dotGenPd; + GraphvizDotGen dotGen( this, pd->fsmCtx, pd->sectionGraph, pd->sectionName, pd->machineId, out ); + dotGen.write(); +} diff --git a/ragel/dot.h b/ragel/dot.h new file mode 100644 index 00000000..745636df --- /dev/null +++ b/ragel/dot.h @@ -0,0 +1,71 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _GVDOTGEN_H +#define _GVDOTGEN_H + +#include <iostream> +#include "gendata.h" + +class GraphvizDotGenOrig : public CodeGenData +{ +public: + GraphvizDotGenOrig( const CodeGenArgs &args ) + : CodeGenData(args) { } + + /* Print an fsm to out stream. */ + void writeTransList( RedStateAp *state ); + void writeDotFile( ); + + virtual void writeStatement( InputLoc &, int, std::string * ); + +private: + /* Writing labels and actions. */ + std::ostream &ONCHAR( Key lowKey, Key highKey ); + std::ostream &TRANS_ACTION( RedStateAp *fromState, RedTransAp *trans ); + std::ostream &ACTION( RedAction *action ); + std::ostream &KEY( Key key ); +}; + +class GraphvizDotGen : public RedBase +{ +public: + GraphvizDotGen( FsmGbl *id, FsmCtx *fsmCtx, FsmAp *fsm, std::string fsmName, int machineId, std::ostream &out ) + : + RedBase(id, fsmCtx, fsm, fsmName, machineId), + out(out) + {} + + bool makeNameInst( std::string &res, NameInst *nameInst ); + void action( ActionTable *actionTable ); + void transAction( StateAp *fromState, TransData *trans ); + void key( Key key ); + void condSpec( CondSpace *condSpace, long condVals ); + void onChar( Key lowKey, Key highKey, CondSpace *condSpace, long condVals ); + void transList( StateAp *state ); + void write(); + void fromStateAction( StateAp *fromState ); + + ostream &out; +}; + +#endif diff --git a/ragel/dotcodegen-orig.cc b/ragel/dotcodegen-orig.cc new file mode 100644 index 00000000..145072dd --- /dev/null +++ b/ragel/dotcodegen-orig.cc @@ -0,0 +1,322 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "dot.h" +#include "gendata.h" +#include "inputdata.h" +#include "rlparse.h" +#include "rlscan.h" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +/* Override this so that write statement processing is ignored */ +void GraphvizDotGenOrig::writeStatement( InputLoc &, int, std::vector<std::string> & ) +{ +} + +std::ostream &GraphvizDotGenOrig::KEY( Key key ) +{ + if ( displayPrintables && key.isPrintable() ) { + // Output values as characters, ensuring we escape the quote (") character + char cVal = (char) key.getVal(); + switch ( cVal ) { + case '"': case '\\': + out << "'\\" << cVal << "'"; + break; + case '\a': + out << "'\\\\a'"; + break; + case '\b': + out << "'\\\\b'"; + break; + case '\t': + out << "'\\\\t'"; + break; + case '\n': + out << "'\\\\n'"; + break; + case '\v': + out << "'\\\\v'"; + break; + case '\f': + out << "'\\\\f'"; + break; + case '\r': + out << "'\\\\r'"; + break; + case ' ': + out << "SP"; + break; + default: + out << "'" << cVal << "'"; + break; + } + } + else { + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); + } + + return out; +} + +std::ostream &GraphvizDotGenOrig::TRANS_ACTION( RedStateAp *fromState, RedTransAp *trans ) +{ + int n = 0; + RedAction *actions[3]; + + if ( fromState->fromStateAction != 0 ) + actions[n++] = fromState->fromStateAction; + if ( trans->action != 0 ) + actions[n++] = trans->action; + if ( trans->targ != 0 && trans->targ->toStateAction != 0 ) + actions[n++] = trans->targ->toStateAction; + + if ( n > 0 ) + out << " / "; + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( GenActionTable::Iter actIt = actions[a]->key.first(); actIt.lte(); actIt++ ) { + GenAction *action = actIt->value; + out << action->nameOrLoc(); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } + return out; +} + +std::ostream &GraphvizDotGenOrig::ACTION( RedAction *action ) +{ + /* The action. */ + out << " / "; + for ( GenActionTable::Iter actIt = action->key.first(); actIt.lte(); actIt++ ) { + GenAction *action = actIt->value; + if ( action->name != 0 ) + out << action->name; + else + out << action->loc.line << ":" << action->loc.col; + if ( !actIt.last() ) + out << ", "; + } + return out; +} + +std::ostream &GraphvizDotGenOrig::ONCHAR( Key lowKey, Key highKey ) +{ + GenCondSpace *condSpace; + if ( lowKey > keyOps->maxKey && (condSpace=findCondSpace(lowKey, highKey) ) ) { + Key values = ( lowKey - condSpace->baseKey ) / keyOps->alphSize(); + + lowKey = keyOps->minKey + + (lowKey - condSpace->baseKey - keyOps->alphSize() * values.getVal()); + highKey = keyOps->minKey + + (highKey - condSpace->baseKey - keyOps->alphSize() * values.getVal()); + KEY( lowKey ); + if ( lowKey != highKey ) { + out << ".."; + KEY( highKey ); + } + out << "("; + + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + bool set = values & (1 << csi.pos()); + if ( !set ) + out << "!"; + out << (*csi)->nameOrLoc(); + if ( !csi.last() ) + out << ", "; + } + out << ")"; + } + else { + /* Output the key. Possibly a range. */ + KEY( lowKey ); + if ( highKey != lowKey ) { + out << ".."; + KEY( highKey ); + } + } + return out; +} + +void GraphvizDotGenOrig::writeTransList( RedStateAp *state ) +{ + /* Build the set of unique transitions out of this state. */ + RedTransSet stTransSet; + for ( RedTransList::Iter tel = state->outRange; tel.lte(); tel++ ) { + /* If we haven't seen the transitions before, the move forward + * emitting all the transitions on the same character. */ + if ( stTransSet.insert( tel->value ) ) { + /* Write out the from and to states. */ + out << "\t" << state->id << " -> "; + + if ( tel->value->targ == 0 ) + out << "err_" << state->id; + else + out << tel->value->targ->id; + + /* Begin the label. */ + out << " [ label = \""; + ONCHAR( tel->lowKey, tel->highKey ); + + /* Walk the transition list, finding the same. */ + for ( RedTransList::Iter mtel = tel.next(); mtel.lte(); mtel++ ) { + if ( mtel->value == tel->value ) { + out << ", "; + ONCHAR( mtel->lowKey, mtel->highKey ); + } + } + + /* Write the action and close the transition. */ + TRANS_ACTION( state, tel->value ); + out << "\" ];\n"; + } + } + + /* Write the default transition. */ + if ( state->defTrans != 0 ) { + /* Write out the from and to states. */ + out << "\t" << state->id << " -> "; + + if ( state->defTrans->targ == 0 ) + out << "err_" << state->id; + else + out << state->defTrans->targ->id; + + /* Begin the label. */ + out << " [ label = \"DEF"; + + /* Write the action and close the transition. */ + TRANS_ACTION( state, state->defTrans ); + out << "\" ];\n"; + } +} + +void GraphvizDotGenOrig::writeDotFile( ) +{ + out << + "digraph " << fsmName << " {\n" + " rankdir=LR;\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << " node [ shape = point ];\n"; + + if ( redFsm->startState != 0 ) + out << " ENTRY;\n"; + + /* Psuedo states for entry points in the entry map. */ + for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ ) { + RedStateAp *state = allStates + *en; + out << " en_" << state->id << ";\n"; + } + + /* Psuedo states for final states with eof actions. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 && st->eofTrans->action != 0 ) + out << " eof_" << st->id << ";\n"; + if ( st->eofAction != 0 ) + out << " eof_" << st->id << ";\n"; + } + + out << " node [ shape = circle, height = 0.2 ];\n"; + + /* Psuedo states for states whose default actions go to error. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + bool needsErr = false; + if ( st->defTrans != 0 && st->defTrans->targ == 0 ) + needsErr = true; + else { + for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ ) { + if ( tel->value->targ == 0 ) { + needsErr = true; + break; + } + } + } + + if ( needsErr ) + out << " err_" << st->id << " [ label=\"\"];\n"; + } + + /* Attributes common to all nodes, plus double circle for final states. */ + out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n"; + + /* List Final states. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->isFinal ) + out << " " << st->id << ";\n"; + } + + /* List transitions. */ + out << " node [ shape = circle ];\n"; + + /* Walk the states. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + writeTransList( st ); + + /* Transitions into the start state. */ + if ( redFsm->startState != 0 ) + out << " ENTRY -> " << redFsm->startState->id << " [ label = \"IN\" ];\n"; + + /* Transitions into the entry points. */ + for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ ) { + RedStateAp *state = allStates + *en; + char *name = entryPointNames[en.pos()]; + out << " en_" << state->id << " -> " << state->id << + " [ label = \"" << name << "\" ];\n"; + } + + /* Out action transitions. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 && st->eofTrans->action != 0 ) { + out << " " << st->id << " -> eof_" << + st->id << " [ label = \"EOF"; + ACTION( st->eofTrans->action ) << "\" ];\n"; + } + if ( st->eofAction != 0 ) { + out << " " << st->id << " -> eof_" << + st->id << " [ label = \"EOF"; + ACTION( st->eofAction ) << "\" ];\n"; + } + } + + out << + "}\n"; +} + +void GraphvizDotGenOrig::finishRagelDef() +{ + /* For dot file generation we want to pick default transitions. */ + redFsm->chooseDefaultSpan(); +} + diff --git a/ragel/flat.cc b/ragel/flat.cc new file mode 100644 index 00000000..24d7ef2c --- /dev/null +++ b/ragel/flat.cc @@ -0,0 +1,576 @@ +/* + * Copyright 2004-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "flat.h" +#include "redfsm.h" +#include "gendata.h" + +void Flat::genAnalysis() +{ + redFsm->sortByStateId(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Do flat expand. */ + redFsm->makeFlatClass(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + setKeyType(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + +void Flat::tableDataPass() +{ + if ( type == Flat::Loop ) { + if ( redFsm->anyActions() ) + taActions(); + } + + taKeys(); + taCharClass(); + taFlatIndexOffset(); + + taIndicies(); + taIndexDefaults(); + taTransCondSpaces(); + + if ( red->condSpaceList.length() > 0 ) + taTransOffsets(); + + taCondTargs(); + taCondActions(); + + taToStateActions(); + taFromStateActions(); + taEofConds(); + taEofActions(); + taEofTrans(); + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Flat::writeData() +{ + if ( type == Flat::Loop ) { + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) + taActions(); + } + + taKeys(); + taCharClass(); + taFlatIndexOffset(); + + taIndicies(); + taIndexDefaults(); + taTransCondSpaces(); + if ( red->condSpaceList.length() > 0 ) + taTransOffsets(); + taCondTargs(); + taCondActions(); + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + taEofConds(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + if ( redFsm->anyEofTrans() ) + taEofTrans(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + + +void Flat::setKeyType() +{ + transKeys.setType( ALPH_TYPE(), alphType->size, alphType->isChar ); + transKeys.isSigned = keyOps->isSigned; +} + +void Flat::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +void Flat::taFlatIndexOffset() +{ + flatIndexOffset.start(); + + int curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + flatIndexOffset.value( curIndOffset ); + + /* Move the index offset ahead. */ + if ( st->transList != 0 ) + curIndOffset += ( st->high - st->low + 1 ); + } + + flatIndexOffset.finish(); +} + +void Flat::taCharClass() +{ + charClass.start(); + + if ( redFsm->classMap != 0 ) { + long long maxSpan = keyOps->span( redFsm->lowKey, redFsm->highKey ); + + for ( long long pos = 0; pos < maxSpan; pos++ ) + charClass.value( redFsm->classMap[pos] ); + } + + charClass.finish(); +} + +void Flat::taToStateActions() +{ + toStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + TO_STATE_ACTION(st); + } + + toStateActions.finish(); +} + +void Flat::taFromStateActions() +{ + fromStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + FROM_STATE_ACTION( st ); + } + + fromStateActions.finish(); +} + +void Flat::taEofActions() +{ + eofActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + EOF_ACTION( st ); + } + + eofActions.finish(); +} + +void Flat::taEofConds() +{ + /* + * EOF Cond Spaces + */ + eofCondSpaces.start(); + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) + eofCondSpaces.value( st->outCondSpace->condSpaceId ); + else + eofCondSpaces.value( -1 ); + } + eofCondSpaces.finish(); + + /* + * EOF Cond Key Indixes + */ + eofCondKeyOffs.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long off = 0; + if ( st->outCondSpace != 0 ) { + off = curOffset; + curOffset += st->outCondKeys.length(); + } + eofCondKeyOffs.value( off ); + } + + eofCondKeyOffs.finish(); + + /* + * EOF Cond Key Lengths. + */ + eofCondKeyLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long len = 0; + if ( st->outCondSpace != 0 ) + len = st->outCondKeys.length(); + eofCondKeyLens.value( len ); + } + + eofCondKeyLens.finish(); + + /* + * EOF Cond Keys + */ + eofCondKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) { + for ( int c = 0; c < st->outCondKeys.length(); c++ ) { + CondKey key = st->outCondKeys[c]; + eofCondKeys.value( key.getVal() ); + } + } + } + + eofCondKeys.finish(); +} + +void Flat::taEofTrans() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + long *transPos = new long[redFsm->transSet.length()]; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + transPos[trans->id] = t; + } + + eofTrans.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long trans = 0; + + if ( st->eofTrans != 0 ) + trans = transPos[st->eofTrans->id] + 1; + + eofTrans.value( trans ); + } + + eofTrans.finish(); + + delete[] transPtrs; + delete[] transPos; +} + +void Flat::taKeys() +{ + transKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->transList ) { + /* Emit just low key and high key. */ + transKeys.value( st->low ); + transKeys.value( st->high ); + } + else { + /* Emit an impossible range so the driver fails the lookup. */ + transKeys.value( 1 ); + transKeys.value( 0 ); + } + } + + transKeys.finish(); +} + +void Flat::taIndicies() +{ + indicies.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->transList != 0 ) { + long long span = st->high - st->low + 1; + for ( long long pos = 0; pos < span; pos++ ) + indicies.value( st->transList[pos]->id ); + } + } + + indicies.finish(); +} + +void Flat::taIndexDefaults() +{ + indexDefaults.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + indexDefaults.value( st->defTrans->id ); + else + indexDefaults.value( 0 ); + } + + indexDefaults.finish(); +} + + +void Flat::taTransCondSpaces() +{ + transCondSpaces.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transPtrs[trans->id] = trans; + } + + /* Keep a count of the num of items in the array written. */ + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + delete[] transPtrs; + + transCondSpaces.finish(); +} + +void Flat::taTransOffsets() +{ + transOffsets.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + int curOffset = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + transOffsets.value( curOffset ); + + curOffset += trans->condFullSize(); + } + + delete[] transPtrs; + + transOffsets.finish(); +} + +void Flat::taCondTargs() +{ + condTargs.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + long fullSize = trans->condFullSize(); + RedCondPair **fullPairs = new RedCondPair*[fullSize]; + for ( long k = 0; k < fullSize; k++ ) + fullPairs[k] = trans->errCond(); + + for ( int c = 0; c < trans->numConds(); c++ ) + fullPairs[trans->outCondKey( c ).getVal()] = trans->outCond( c ); + + for ( int k = 0; k < fullSize; k++ ) { + RedCondPair *cond = fullPairs[k]; + condTargs.value( cond->targ->id ); + } + + delete[] fullPairs; + } + delete[] transPtrs; + + condTargs.finish(); +} + +void Flat::taCondActions() +{ + condActions.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + long fullSize = trans->condFullSize(); + RedCondPair **fullPairs = new RedCondPair*[fullSize]; + for ( long k = 0; k < fullSize; k++ ) + fullPairs[k] = trans->errCond(); + + for ( int c = 0; c < trans->numConds(); c++ ) + fullPairs[trans->outCondKey( c ).getVal()] = trans->outCond( c ); + + for ( int k = 0; k < fullSize; k++ ) { + RedCondPair *cond = fullPairs[k]; + COND_ACTION( cond ); + } + delete[] fullPairs; + } + delete[] transPtrs; + + condActions.finish(); +} + +/* Write out the array of actions. */ +void Flat::taActions() +{ + actions.start(); + + /* Add in the the empty actions array. */ + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Length first. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + +void Flat::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Flat::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Flat::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + + nfaPopTrans.value( 0 ); + + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + + +void Flat::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + nfaOffsets.value( 0 ); + } + else { + nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + nfaOffsets.finish(); +} + + + + + + + + diff --git a/ragel/flat.h b/ragel/flat.h new file mode 100644 index 00000000..e13a7297 --- /dev/null +++ b/ragel/flat.h @@ -0,0 +1,94 @@ +/* + * Copyright 2004-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_FLAT_H +#define _C_FLAT_H + +#include <iostream> +#include "codegen.h" +#include "tables.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +class Flat + : public virtual Tables +{ +protected: + enum Type { + Loop = 1, Exp + }; + +public: + Flat( const CodeGenArgs &args, Type type ) + : + Tables( args ), + type(type) + {} + + virtual ~Flat() { } + +protected: + Type type; + + void taKeys(); + void taKeySpans(); + void taCharClass(); + void taActions(); + void taFlatIndexOffset(); + void taIndicies(); + void taIndexDefaults(); + void taTransCondSpaces(); + void taTransOffsets(); + void taCondTargs(); + void taCondActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofActions(); + void taEofTrans(); + void taEofConds(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void setKeyType(); + + std::ostream &INDICIES(); + std::ostream &TRANS_COND_SPACES(); + std::ostream &TRANS_OFFSETS(); + std::ostream &TRANS_LENGTHS(); + std::ostream &COND_KEYS(); + std::ostream &COND_TARGS(); + std::ostream &COND_ACTIONS(); + + virtual void setTableState( TableArray::State ); + + virtual void genAnalysis(); + virtual void tableDataPass(); + virtual void writeData(); +}; + +#endif diff --git a/ragel/flatbreak.cc b/ragel/flatbreak.cc new file mode 100644 index 00000000..9b50af1c --- /dev/null +++ b/ragel/flatbreak.cc @@ -0,0 +1,118 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "flatbreak.h" + +void FlatBreak::LOCATE_TRANS() +{ + if ( redFsm->classMap == 0 ) { + out << + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n"; + } + else { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + bool limitLow = keyOps->eq( lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( highKey, keyOps->maxKey ); + + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), "(" + vCS() + "<<1)" ) << ";\n" + " " << inds << " = " << OFFSET( ARR_REF( indicies ), + ARR_REF( flatIndexOffset ) + "[" + vCS() + "]" ) << ";\n" + "\n"; + + if ( !limitLow || !limitHigh ) { + out << " if ( "; + + if ( !limitHigh ) + out << GET_KEY() << " <= " << highKey; + + if ( !limitHigh && !limitLow ) + out << " && "; + + if ( !limitLow ) + out << GET_KEY() << " >= " << lowKey; + + out << " ) {\n"; + } + + out << + " " << ic << " = " << CAST("int") << ARR_REF( charClass ) << "[" << CAST("int") << GET_KEY() << + " - " << lowKey << "];\n" + " if ( " << ic << " <= " << CAST("int") << DEREF( ARR_REF( transKeys ), string(keys) + "+1" ) << " && " << + "" << ic << " >= " << CAST("int") << DEREF( ARR_REF( transKeys ), string(keys) + "" ) << " )\n" + " " << trans << " = " << CAST(UINT()) << DEREF( ARR_REF( indicies ), + string(inds) + " + " + CAST("int") + "( " + string(ic) + " - " + CAST("int") + + DEREF( ARR_REF( transKeys ), string(keys) + "" ) + " ) " ) << "; \n" + " else\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << + "[" << vCS() << "]" << ";\n"; + + if ( !limitLow || !limitHigh ) { + out << + " }\n" + " else {\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n" + " }\n" + "\n"; + } + } + + +} + +void FlatBreak::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + out << + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + out << + " switch ( " << ARR_REF( transCondSpaces ) << "[" << trans << "] ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + if ( condSpace->numTransRefs > 0 ) { + out << " " << CASE( STR(condSpace->condSpaceId) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + } + + out << + " }\n" + " " << cond << " += " << CAST( UINT() ) << "" << cpc << ";\n"; + } +} diff --git a/ragel/flatbreak.h b/ragel/flatbreak.h new file mode 100644 index 00000000..23400000 --- /dev/null +++ b/ragel/flatbreak.h @@ -0,0 +1,72 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_FLATBREAK_H +#define RAGEL_FLATBREAK_H + +#include "flat.h" +#include "actloop.h" +#include "actexp.h" + +struct FlatBreak +: + public Flat, public TabBreak +{ + FlatBreak( const CodeGenArgs &args, Flat::Type type ) + : + Tables( args ), + Flat( args, type ), + TabBreak( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class FlatBreakLoop + : public FlatBreak, public ActLoop +{ +public: + FlatBreakLoop( const CodeGenArgs &args ) + : + Tables( args ), + FlatBreak( args, Flat::Loop ), + ActLoop( args ) + {} +}; + +/* + * FlatBreakExp + */ +class FlatBreakExp + : public FlatBreak, public ActExp +{ +public: + FlatBreakExp( const CodeGenArgs &args ) + : + Tables( args ), + FlatBreak( args, Flat::Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/ragel/flatgoto.cc b/ragel/flatgoto.cc new file mode 100644 index 00000000..2a0e9a21 --- /dev/null +++ b/ragel/flatgoto.cc @@ -0,0 +1,118 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "flatgoto.h" + +void FlatGoto::LOCATE_TRANS() +{ + if ( redFsm->classMap == 0 ) { + out << + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n"; + } + else { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + bool limitLow = keyOps->eq( lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( highKey, keyOps->maxKey ); + + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), "(" + vCS() + "<<1)" ) << ";\n" + " " << inds << " = " << OFFSET( ARR_REF( indicies ), + ARR_REF( flatIndexOffset ) + "[" + vCS() + "]" ) << ";\n" + "\n"; + + if ( !limitLow || !limitHigh ) { + out << " if ( "; + + if ( !limitHigh ) + out << GET_KEY() << " <= " << highKey; + + if ( !limitHigh && !limitLow ) + out << " && "; + + if ( !limitLow ) + out << GET_KEY() << " >= " << lowKey; + + out << " ) {\n"; + } + + out << + " " << ic << " = " << CAST("int") << ARR_REF( charClass ) << "[" << CAST("int") << GET_KEY() << + " - " << lowKey << "];\n" + " if ( " << ic << " <= " << CAST("int") << DEREF( ARR_REF( transKeys ), string(keys) + "+1" ) << " && " << + "" << ic << " >= " << CAST("int") << DEREF( ARR_REF( transKeys ), string(keys) + "" ) << " )\n" + " " << trans << " = " << CAST(UINT()) << DEREF( ARR_REF( indicies ), + string(inds) + " + " + CAST("int") + "( " + string(ic) + " - " + CAST("int") + + DEREF( ARR_REF( transKeys ), string(keys) + "" ) + " ) " ) << "; \n" + " else\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << + "[" << vCS() << "]" << ";\n"; + + if ( !limitLow || !limitHigh ) { + out << + " }\n" + " else {\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n" + " }\n" + "\n"; + } + } + +} + + +void FlatGoto::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + out << + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + out << + " switch ( " << ARR_REF( transCondSpaces ) << "[" << trans << "] ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + if ( condSpace->numTransRefs > 0 ) { + out << " " << CASE( STR(condSpace->condSpaceId) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + } + + out << + " }\n" + " " << cond << " += " << CAST( UINT() ) << "" << cpc << ";\n"; + } +} diff --git a/ragel/flatgoto.h b/ragel/flatgoto.h new file mode 100644 index 00000000..e21b6cd9 --- /dev/null +++ b/ragel/flatgoto.h @@ -0,0 +1,72 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_FLATGOTO_H +#define RAGEL_FLATGOTO_H + +#include "flat.h" +#include "actloop.h" +#include "actexp.h" + +struct FlatGoto +: + public Flat, public TabGoto +{ + FlatGoto( const CodeGenArgs &args, Flat::Type type ) + : + Tables( args ), + Flat( args, type ), + TabGoto( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class FlatGotoLoop + : public FlatGoto, public ActLoop +{ +public: + FlatGotoLoop( const CodeGenArgs &args ) + : + Tables( args ), + FlatGoto( args, Flat::Loop ), + ActLoop( args ) + {} +}; + +/* + * FlatGotoExp + */ +class FlatGotoExp + : public FlatGoto, public ActExp +{ +public: + FlatGotoExp( const CodeGenArgs &args ) + : + Tables( args ), + FlatGoto( args, Flat::Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/ragel/flatvar.cc b/ragel/flatvar.cc new file mode 100644 index 00000000..37ea6be5 --- /dev/null +++ b/ragel/flatvar.cc @@ -0,0 +1,119 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "flatvar.h" + +#include "parsedata.h" +#include "inputdata.h" + +void FlatVar::LOCATE_TRANS() +{ + if ( redFsm->classMap == 0 ) { + out << + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n"; + } + else { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + bool limitLow = keyOps->eq( lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( highKey, keyOps->maxKey ); + + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), "(" + vCS() + "<<1)" ) << ";\n" + " " << inds << " = " << OFFSET( ARR_REF( indicies ), + ARR_REF( flatIndexOffset ) + "[" + vCS() + "]" ) << ";\n" + "\n"; + + if ( !limitLow || !limitHigh ) { + out << " if ( "; + + if ( !limitHigh ) + out << GET_KEY() << " <= " << highKey; + + if ( !limitHigh && !limitLow ) + out << " && "; + + if ( !limitLow ) + out << GET_KEY() << " >= " << lowKey; + + out << " ) {\n"; + } + + out << + " " << ic << " = " << CAST("int") << ARR_REF( charClass ) << "[" << CAST("int") << GET_KEY() << + " - " << lowKey << "];\n" + " if ( " << ic << " <= " << CAST("int") << DEREF( ARR_REF( transKeys ), string(keys) + "+1" ) << " && " << + "" << ic << " >= " << CAST("int") << DEREF( ARR_REF( transKeys ), string(keys) + "" ) << " )\n" + " " << trans << " = " << CAST(UINT()) << DEREF( ARR_REF( indicies ), + string(inds) + " + " + CAST("int") + "( " + string(ic) + " - " + CAST("int") + + DEREF( ARR_REF( transKeys ), string(keys) + "" ) + " ) " ) << "; \n" + " else\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << + "[" << vCS() << "]" << ";\n"; + + if ( !limitLow || !limitHigh ) { + out << + " }\n" + " else {\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n" + " }\n" + "\n"; + } + } +} + +void FlatVar::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + out << + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + out << + " switch ( " << ARR_REF( transCondSpaces ) << "[" << trans << "] ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + if ( condSpace->numTransRefs > 0 ) { + out << " " << CASE( STR(condSpace->condSpaceId) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + } + + out << + " }\n" + " " << cond << " += " << CAST( UINT() ) << "" << cpc << ";\n"; + } +} diff --git a/ragel/flatvar.h b/ragel/flatvar.h new file mode 100644 index 00000000..9cd80eab --- /dev/null +++ b/ragel/flatvar.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_FLATVAR_H +#define RAGEL_FLATVAR_H + +#include "flat.h" +#include "actloop.h" +#include "actexp.h" + +struct FlatVar +: + public Flat, public TabVar +{ + FlatVar( const CodeGenArgs &args, Flat::Type type ) + : + Tables( args ), + Flat( args, type ), + TabVar( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class FlatVarLoop + : public FlatVar, public ActLoop +{ +public: + FlatVarLoop( const CodeGenArgs &args ) + : + Tables( args ), + FlatVar( args, Flat::Loop ), + ActLoop( args ) + {} +}; + +class FlatVarExp +: + public FlatVar, public ActExp +{ +public: + FlatVarExp( const CodeGenArgs &args ) + : + Tables( args ), + FlatVar( args, Flat::Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/ragel/fsmap.cc b/ragel/fsmap.cc new file mode 100644 index 00000000..c310f923 --- /dev/null +++ b/ragel/fsmap.cc @@ -0,0 +1,1198 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" +#include <iostream> +using std::endl; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, LongestMatchPart *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmAp::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->priorTable.setPrior( ordering, prior ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->priorTable.setPrior( ordering, prior ); + } + } + } + + if ( startState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *startState->nfaOut; na.lte(); na++ ) + na->priorTable.setPrior( ordering, prior ); + } + + /* If the new start state is final then set the out priority. This follows + * the same convention as setting start action in the out action table of + * a final start state. */ + if ( startState->stateBits & STB_ISFINAL ) + startState->outPriorTable.setPrior( ordering, prior ); + + /* Start fsm priorities are a special case that may require + * minimization afterwards. */ + afterOpMinimize( this ); +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmAp::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->priorTable.setPrior( ordering, prior ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->priorTable.setPrior( ordering, prior ); + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *state->nfaOut; na.lte(); na++ ) + na->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmAp::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter t = (*state)->inTrans; t.lte(); t++ ) + t->priorTable.setPrior( ordering, prior ); + for ( CondInList::Iter t = (*state)->inCond; t.lte(); t++ ) + t->priorTable.setPrior( ordering, prior ); + + if ( (*state)->nfaIn != 0 ) { + for ( NfaInList::Iter na = *(*state)->nfaIn; na.lte(); na++ ) + na->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmAp::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } + + /* If start state is final then add the action to the out action table. + * This means that when the null string is accepted the start action will + * not be bypassed. */ + if ( startState->stateBits & STB_ISFINAL ) + startState->outActionTable.setAction( ordering, action ); + + if ( startState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *startState->nfaOut; na.lte(); na++ ) { + + StateAp *state = na->toState; + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } + + /* If start state is final then add the action to the out action table. + * This means that when the null string is accepted the start action will + * not be bypassed. */ + if ( state->stateBits & STB_ISFINAL ) + state->outActionTable.setAction( ordering, action ); + + } + } + + afterOpMinimize( this ); +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmAp::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmAp::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter t = (*state)->inTrans; t.lte(); t++ ) + t->actionTable.setAction( ordering, action ); + for ( CondInList::Iter t = (*state)->inCond; t.lte(); t++ ) + t->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter t = (*state)->inTrans; t.lte(); t++ ) + t->lmActionTable.setAction( ordering, lmPart ); + for ( CondInList::Iter t = (*state)->inCond; t.lte(); t++ ) + t->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmAp::fillGaps( StateAp *state ) +{ + /* + * First pass fills in the the caps between transitions. + */ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, ctx->keyOps->minKey, ctx->keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( ctx->keyOps->lt( ctx->keyOps->minKey, trans->lowKey ) ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + ctx->keyOps->decrement( highKey ); + + attachNewTrans( state, 0, ctx->keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + ctx->keyOps->increment( nextKey ); + + /* Check for a gap from last up to here. */ + if ( ctx->keyOps->lt( nextKey, trans->lowKey ) ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + ctx->keyOps->decrement( highKey ); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( ctx->keyOps->lt( lastHigh, ctx->keyOps->maxKey ) ) { + /* Get a copy of the default. */ + ctx->keyOps->increment( lastHigh ); + + attachNewTrans( state, 0, lastHigh, ctx->keyOps->maxKey ); + } + } + + /* + * Second pass fills in gaps in condition lists. + */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + continue; + + CondList srcList; + srcList.transfer( trans->tcap()->condList ); + + CondList::Iter cond = srcList, next; + + /* Check for gap at the beginning. */ + if ( cond->key > 0 ) { + for ( CondKey key = 0; key < cond->key; key.increment() ) + attachNewCond( trans, state, 0, key ); + } + + next = cond.next(); + trans->tcap()->condList.append( cond ); + + CondKey lastKey = cond->key; + + for ( cond = next; cond.lte(); cond = next ) { + /* Make the next key following the last range. */ + CondKey nextKey = lastKey; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < cond->key ) { + for ( CondKey key = nextKey; key < cond->key; key.increment() ) + attachNewCond( trans, state, 0, key ); + } + + next = cond.next(); + trans->tcap()->condList.append( cond ); + + lastKey = cond->key; + } + + CondKey high = (trans->condSpace == 0) ? + 0 : (1 << trans->condSpace->condSet.length()); + + /* Now check for a gap on the end to fill. */ + if ( lastKey < high ) { + /* Get a copy of the default. */ + lastKey.increment(); + + for ( CondKey key = lastKey; key < high; key.increment() ) + attachNewCond( trans, state, 0, key ); + } + } +} + +void FsmAp::setErrorActions( StateAp *state, const ActionTable &other ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) + trans->tdap()->actionTable.setActions( other ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState == 0 ) + cond->actionTable.setActions( other ); + } + } + } +} + +void FsmAp::setErrorAction( StateAp *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState == 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } +} + + +/* Give a target state for error transitions. */ +void FsmAp::setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->tdap()->fromState, target, trans->tdap() ); + trans->tdap()->actionTable.setActions( orderings, actions, nActs ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( cond->fromState, target, cond ); + cond->actionTable.setActions( orderings, actions, nActs ); + } + } + } + } +} + +void FsmAp::transferOutActions( StateAp *state ) +{ + for ( ActionTable::Iter act = state->outActionTable; act.lte(); act++ ) + state->eofActionTable.setAction( act->key, act->value ); + state->outActionTable.empty(); +} + +void FsmAp::transferErrorActions( StateAp *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + if ( ! state->isFinState() ) + state->eofActionTable.setAction( act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmAp::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); + + afterOpMinimize( this ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmAp::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmAp::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmAp::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmAp::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmAp::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmAp::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); + + afterOpMinimize( this ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmAp::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmAp::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmAp::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmAp::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + startState->toStateActionTable.setAction( ordering, action ); + + afterOpMinimize( this ); +} + +/* Set to state actions in all states. */ +void FsmAp::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmAp::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmAp::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmAp::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + startState->fromStateActionTable.setAction( ordering, action ); + + afterOpMinimize( this ); +} + +void FsmAp::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmAp::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->tdap()->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = cond->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + } + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmAp::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + trans->tdap()->priorTable.empty(); + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) + cond->priorTable.empty(); + } + } + + if ( state->nfaIn != 0 ) { + for ( NfaInList::Iter na = *state->nfaIn; na.lte(); na++ ) + na->priorTable.empty(); + } + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmAp::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->tdap()->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->tdap()->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = cond->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = cond->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + } + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmAp::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & STB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSpace == 0 ); + assert( state->outCondKeys.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & STB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmAp::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + return -1; + } + else if ( pd1->desc->priority > pd2->desc->priority ) { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + return 1; + } + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +int FsmAp::compareCondListBitElim( const CondList &condList1, const CondList &condList2 ) +{ + ValPairIter< PiList<CondAp> > outPair( condList1, condList2 ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case ValPairIter<CondAp>::RangeInS1: { + int compareRes = FsmAp::compareCondBitElimPtr<CondAp>( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIter<CondAp>::RangeInS2: { + int compareRes = FsmAp::compareCondBitElimPtr<CondAp>( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIter<CondAp>::RangeOverlap: { + int compareRes = FsmAp::compareCondBitElimPtr<CondAp>( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + }} + } + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmAp::compareTransData( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1->condSpace < trans2->condSpace ) + return -1; + else if ( trans2->condSpace < trans1->condSpace ) + return 1; + + if ( trans1->plain() ) { + int compareRes = FsmAp::compareCondDataPtr( trans1->tdap(), trans2->tdap() ); + if ( compareRes != 0 ) + return compareRes; + } + else { + ValPairIter< PiList<CondAp> > outPair( trans1->tcap()->condList, + trans2->tcap()->condList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case ValPairIter<CondAp>::RangeInS1: { + int compareRes = FsmAp::compareCondDataPtr<CondAp>( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIter<CondAp>::RangeInS2: { + int compareRes = FsmAp::compareCondDataPtr<CondAp>( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIter<CondAp>::RangeOverlap: { + int compareRes = FsmAp::compareCondDataPtr<CondAp>( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + }} + } + } + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +template< class Trans > int FsmAp::compareCondData( Trans *trans1, Trans *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +template< class Trans > int FsmAp::compareCondBitElim( Trans *trans1, Trans *trans2 ) +{ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Out condition space and set of vals. */ + if ( state1->outCondSpace < state2->outCondSpace ) + return -1; + else if ( state1->outCondSpace > state2->outCondSpace ) + return 1; + + cmpRes = CmpTable<int>::compare( state1->outCondKeys, + state2->outCondKeys ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + cmpRes = CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + return CmpTable<LongestMatchPart*>::compare( + state1->lmNfaParts, state2->lmNfaParts ); +} + + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmAp::clearOutData( StateAp *state ) +{ + /* Kill the out actions and priorities. */ + state->outCondSpace = 0; + state->outCondKeys.empty(); + state->outActionTable.empty(); + state->outPriorTable.empty(); +} + +bool FsmAp::hasOutData( StateAp *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSpace != 0 || + state->outCondKeys.length() > 0 || + state->outPriorTable.length() > 0 || + state->outCondSpace != 0 ); +} + +/* + * Setting Conditions. + */ + +FsmRes FsmAp::startFsmCondition( Action *condAction, bool sense ) +{ + CondSet set; + CondKeySet vals; + set.insert( condAction ); + vals.append( sense ? 1 : 0 ); + + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + FsmRes res = embedCondition( this, startState, set, vals ); + if ( !res.success() ) + return res; + + if ( startState->nfaOut != 0 ) { + /* Only one level. */ + for ( NfaTransList::Iter na = *startState->nfaOut; na.lte(); na++ ) { + res = embedCondition( this, startState, set, vals ); + if ( !res.success() ) + return res; + } + } + + afterOpMinimize( this ); + + return FsmRes( FsmRes::Fsm(), this ); +} + +void FsmAp::allTransCondition( Action *condAction, bool sense ) +{ + CondSet set; + CondKeySet vals; + set.insert( condAction ); + vals.append( sense ? 1 : 0 ); + + for ( StateList::Iter state = stateList; state.lte(); state++ ) + embedCondition( this, state, set, vals ); +} + +void FsmAp::leaveFsmCondition( Action *condAction, bool sense ) +{ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + addOutCondition( *state, condAction, sense ); +} diff --git a/ragel/fsmattach.cc b/ragel/fsmattach.cc new file mode 100644 index 00000000..4ed289c4 --- /dev/null +++ b/ragel/fsmattach.cc @@ -0,0 +1,855 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +void FsmAp::attachStateDict( StateAp *from, StateAp *to ) +{ + if ( to->stateDictIn == 0 ) + to->stateDictIn = new StateSet; + + bool inserted = to->stateDictIn->insert( from ); + assert( inserted ); + + if ( from != to ) { + if ( misfitAccounting ) { + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +} + +void FsmAp::detachStateDict( StateAp *from, StateAp *to ) +{ + bool removed = to->stateDictIn->remove( from ); + assert( removed ); + + to->foreignInTrans -= 1; + + if ( from != to ) { + if ( misfitAccounting ) { + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +void FsmAp::attachToNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ) +{ + if ( to->nfaIn == 0 ) + to->nfaIn = new NfaInList; + + nfaTrans->fromState = from; + nfaTrans->toState = to; + + attachToInList( from, to, to->nfaIn->head, nfaTrans ); +} + +void FsmAp::detachFromNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ) +{ + nfaTrans->fromState = 0; + nfaTrans->toState = 0; + + detachFromInList( from, to, to->nfaIn->head, nfaTrans ); +} + +template< class Head > void FsmAp::attachToInList( StateAp *from, + StateAp *to, Head *&head, Head *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +template< class Head > void FsmAp::detachFromInList( StateAp *from, StateAp *to, + Head *&head, Head *trans ) +{ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +CondAp *FsmAp::attachNewCond( TransAp *trans, StateAp *from, StateAp *to, CondKey onChar ) +{ + /* Sub-transition for conditions. */ + CondAp *condAp = new CondAp( trans ); + condAp->key = onChar; + trans->tcap()->condList.append( condAp ); + + condAp->fromState = from; + condAp->toState = to; + + /* Attach in list. */ + if ( to != 0 ) + attachToInList( from, to, to->inCond.head, condAp ); + + return condAp; +} + +TransAp *FsmAp::attachNewTrans( StateAp *from, StateAp *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + TransDataAp *retVal = new TransDataAp(); + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + retVal->fromState = from; + retVal->toState = to; + + /* Attach in list. */ + if ( to != 0 ) + attachToInList( from, to, to->inTrans.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmAp::attachTrans( StateAp *from, StateAp *to, TransDataAp *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* For now always attache the one and only condList element. */ + attachToInList( from, to, to->inTrans.head, trans ); + } +} + +void FsmAp::attachTrans( StateAp *from, StateAp *to, CondAp *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* For now always attache the one and only condList element. */ + attachToInList( from, to, to->inCond.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, TransDataAp *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inTrans.head, trans ); + } +} + +void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, CondAp *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inCond.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmAp::detachTrans( StateAp *from, StateAp *to, TransDataAp *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + detachFromInList( from, to, to->inTrans.head, trans ); + } +} + +void FsmAp::detachTrans( StateAp *from, StateAp *to, CondAp *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + detachFromInList( from, to, to->inCond.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmAp::detachState( StateAp *state ) +{ + while ( state->inTrans.head != 0 ) { + /* Get pointers to the trans and the state. */ + TransDataAp *trans = state->inTrans.head; + + StateAp *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + fromState->outList.detach( trans ); + delete trans->tdap(); + } + + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inCond.head != 0 ) { + /* Get pointers to the trans and the state. */ + CondAp *condAp = state->inCond.head; + TransAp *trans = condAp->transAp; + + StateAp *fromState = condAp->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, condAp ); + + trans->tcap()->condList.detach( condAp ); + delete condAp; + + if ( trans->tcap()->condList.length() == 0 ) { + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans->tcap(); + } + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + if ( trans->plain() ) { + detachTrans( state, trans->tdap()->toState, trans->tdap() ); + delete trans->tdap(); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); ) { + CondList::Iter next = cond.next(); + detachTrans( state, cond->toState, cond ); + delete cond; + cond = next; + } + trans->tcap()->condList.abandon(); + delete trans->tcap(); + } + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & STB_ISFINAL ) + finStateSet.remove( state ); + + if ( state->nfaIn != 0 ) { + while ( state->nfaIn->head != 0 ) { + NfaTrans *trans = state->nfaIn->head; + StateAp *fromState = trans->fromState; + + detachFromNfa( fromState, state, trans ); + fromState->nfaOut->detach( trans ); + delete trans; + } + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter t = *state->nfaOut; t.lte(); ) { + NfaTransList::Iter next = t.next(); + detachFromNfa( t->fromState, t->toState, t ); + state->nfaOut->detach( t ); + delete t; + t = next; + } + state->nfaOut->abandon(); + delete state->nfaOut; + state->nfaOut = 0; + } + + if ( state->stateDictIn != 0 ) { + for ( StateSet::Iter s = *state->stateDictIn; s.lte(); s++ ) { + bool removed = (*s)->stateDictEl->stateSet.remove( state ); + assert( removed ); + } + + delete state->stateDictIn; + state->stateDictIn = 0; + } + + if ( state->stateDictEl != 0 ) { + for ( StateSet::Iter s = state->stateDictEl->stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + stateDict.detach( state->stateDictEl ); + delete state->stateDictEl; + state->stateDictEl = 0; + + nfaList.detach( state ); + } +} + +TransDataAp *FsmAp::dupTransData( StateAp *from, TransDataAp *srcTrans ) +{ + /* Make a new transition. */ + TransDataAp *newTrans = new TransDataAp(); + newTrans->condSpace = srcTrans->condSpace; + + attachTrans( from, srcTrans->tdap()->toState, newTrans ); + addInTrans( newTrans, srcTrans->tdap() ); + + return newTrans; +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +TransAp *FsmAp::dupTrans( StateAp *from, TransAp *srcTrans ) +{ + if ( srcTrans->plain() ) { + /* Make a new transition. */ + TransDataAp *newTrans = new TransDataAp(); + newTrans->condSpace = srcTrans->condSpace; + + attachTrans( from, srcTrans->tdap()->toState, newTrans ); + addInTrans( newTrans, srcTrans->tdap() ); + + return newTrans; + } + else { + /* Make a new transition. */ + TransAp *newTrans = new TransCondAp(); + newTrans->condSpace = srcTrans->condSpace; + + for ( CondList::Iter sc = srcTrans->tcap()->condList; sc.lte(); sc++ ) { + /* Sub-transition for conditions. */ + CondAp *newCond = new CondAp( newTrans ); + newCond->key = sc->key; + newTrans->tcap()->condList.append( newCond ); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, sc->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newCond, sc.ptr ); + } + + return newTrans; + } +} + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +CondAp *FsmAp::dupCondTrans( StateAp *from, TransAp *destParent, CondAp *srcTrans ) +{ + /* Sub-transition for conditions. */ + CondAp *newCond = new CondAp( destParent ); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newCond, srcTrans ); + + return newCond; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +template< class Trans > Trans *FsmAp::fsmAttachStates( StateAp *from, + Trans *destTrans, Trans *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + StateAp *toState = srcTrans->toState; + StateAp *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + StateAp *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Setup the in links. */ + for ( StateSet::Iter s = stateSet; s.lte(); s++ ) + attachStateDict( combinState, *s ); + + /* Add to the fill list. */ + nfaList.append( combinState ); + } + + /* Get the state insertted/deleted. */ + StateAp *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +template < class Trans > Trans *FsmAp::mergeTrans( StateAp *from, + Trans *destTrans, Trans *srcTrans ) +{ + Trans *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +CondAp *FsmAp::crossCondTransitions( StateAp *from, TransAp *destParent, + CondAp *destTrans, CondAp *srcTrans ) +{ + CondAp *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + delete destTrans; + retTrans = dupCondTrans( from, destParent, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +TransAp *FsmAp::copyTransForExpansion( StateAp *from, TransAp *srcTrans ) +{ + /* This is the dup without the attach. */ + TransCondAp *newTrans = new TransCondAp(); + newTrans->condSpace = srcTrans->condSpace; + + if ( srcTrans->plain() ) { + TransDataAp *srcData = srcTrans->tdap(); + CondAp *newCond = new CondAp( newTrans ); + newCond->key = 0; + + attachTrans( srcData->fromState, srcData->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + //addInTrans( newCond, srcData ); + + /* Not a copy of ourself, get the functions and priorities. */ + newCond->lmActionTable.setActions( srcData->lmActionTable ); + newCond->actionTable.setActions( srcData->actionTable ); + newCond->priorTable.setPriors( srcData->priorTable ); + + newTrans->condList.append( newCond ); + } + else { + for ( CondList::Iter sc = srcTrans->tcap()->condList; sc.lte(); sc++ ) { + /* Sub-transition for conditions. */ + CondAp *newCond = new CondAp( newTrans ); + newCond->key = sc->key; + + attachTrans( sc->fromState, sc->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newCond, sc.ptr ); + + newTrans->condList.append( newCond ); + } + } + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = srcTrans->lowKey; + newTrans->highKey = srcTrans->highKey; + + return newTrans; +} + +void FsmAp::freeEffectiveTrans( TransAp *trans ) +{ + for ( CondList::Iter sc = trans->tcap()->condList; sc.lte(); ) { + CondList::Iter next = sc.next(); + detachTrans( sc->fromState, sc->toState, sc ); + delete sc; + sc = next; + } + trans->tcap()->condList.abandon(); + delete trans->tcap(); +} + +TransDataAp *FsmAp::crossTransitionsBothPlain( StateAp *from, + TransDataAp *destTrans, TransDataAp *srcTrans ) +{ + /* Neither have cond space and no expansion took place. Cross them. */ + TransDataAp *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + delete destTrans; + retTrans = dupTransData( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +TransAp *FsmAp::crossTransitions( StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + if ( destTrans->plain() && srcTrans->plain() ) { + /* Return the transition that resulted from the cross. */ + return crossTransitionsBothPlain( from, + destTrans->tdap(), srcTrans->tdap() ); + } + else { + /* At least one is non-empty. Target is non-empty. Need to work in + * condition spaced. */ + CondSpace *mergedSpace = expandCondSpace( destTrans, srcTrans ); + + /* If the dest state cond space does not equal the merged, we have to + * rewrite it. If the src state cond space does not equal, we have to + * copy it. */ + + TransAp *effSrcTrans = srcTrans; + + if ( srcTrans->condSpace != mergedSpace ) { + effSrcTrans = copyTransForExpansion( from, srcTrans ); + CondSpace *orig = effSrcTrans->condSpace; + effSrcTrans->condSpace = mergedSpace; + expandConds( from, effSrcTrans, orig, mergedSpace ); + } + + if ( destTrans->condSpace != mergedSpace ) { + /* Make the transition into a conds transition. If dest is a plain + * transition, we have to replace it with a conds transition. */ + if ( destTrans->plain() ) + destTrans = convertToCondAp( from, destTrans->tdap() ); + + /* Now expand the dest. */ + CondSpace *orig = destTrans->condSpace; + destTrans->condSpace = mergedSpace; + expandConds( from, destTrans, orig, mergedSpace ); + } + + /* The destination list. */ + CondList destList; + + /* Set up an iterator to stop at breaks. */ + ValPairIter< PiList<CondAp> > outPair( destTrans->tcap()->condList, + effSrcTrans->tcap()->condList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case ValPairIter<CondAp>::RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + CondAp *destCond = outPair.s1Tel.trans; + destCond->key = outPair.s1Tel.key; + destList.append( destCond ); + break; + } + case ValPairIter<CondAp>::RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + CondAp *newCond = dupCondTrans( from, destTrans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newCond->key = outPair.s2Tel.key; + destList.append( newCond ); + break; + } + case ValPairIter<CondAp>::RangeOverlap: { + /* Exact overlap, cross them. */ + CondAp *newTrans = crossCondTransitions( from, destTrans, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->key = outPair.s1Tel.key; + destList.append( newTrans ); + break; + }} + } + + /* Abandon the old outList and transfer destList into it. */ + destTrans->tcap()->condList.transfer( destList ); + + /* Delete the duplicate. Don't detach anything. */ + if ( srcTrans != effSrcTrans ) + freeEffectiveTrans( effSrcTrans ); + + return destTrans; + } +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmAp::outTransCopy( StateAp *dest, TransAp *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + RangePairIter< PiList<TransAp> > outPair( ctx, dest->outList, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangePairIter<TransAp>::RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + TransAp *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangePairIter<TransAp>::RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + TransAp *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangePairIter<TransAp>::RangeOverlap: { + /* Exact overlap, cross them. */ + TransAp *newTrans = crossTransitions( dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case RangePairIter<TransAp>::BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmAp::moveInwardTrans( StateAp *dest, StateAp *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inTrans.head != 0 ) { + /* Get trans and from state. */ + TransDataAp *trans = src->inTrans.head; + StateAp *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } + + /* Move the transitions in inList. */ + while ( src->inCond.head != 0 ) { + /* Get trans and from state. */ + CondAp *trans = src->inCond.head; + StateAp *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } + + /* Move inward nfa links. */ + if ( src->nfaIn != 0 ) { + while ( src->nfaIn->head != 0 ) { + NfaTrans *trans = src->nfaIn->head; + StateAp *fromState = trans->fromState; + + detachFromNfa( fromState, src, trans ); + attachToNfa( fromState, dest, trans ); + } + } +} diff --git a/ragel/fsmbase.cc b/ragel/fsmbase.cc new file mode 100644 index 00000000..bdf40279 --- /dev/null +++ b/ragel/fsmbase.cc @@ -0,0 +1,853 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" +#include "parsedata.h" + +#include <string.h> +#include <assert.h> +#include <iostream> + +FsmCtx::FsmCtx( FsmGbl *fsmGbl ) +: + minimizeLevel(fsmGbl->minimizeLevel), + minimizeOpt(fsmGbl->minimizeOpt), + + /* No limit. */ + stateLimit(STATE_UNLIMITED), + + printStatistics(fsmGbl->printStatistics), + + checkPriorInteraction(fsmGbl->checkPriorInteraction), + + unionOp(false), + + condsCheckDepth(0), + + curActionOrd(0), + curPriorOrd(0), + + nextPriorKey(0), + nextCondId(0), + + fsmGbl(fsmGbl), + generatingSectionSubset(false), + lmRequiresErrorState(false), + nameIndex(0), + + getKeyExpr(0), + accessExpr(0), + prePushExpr(0), + postPopExpr(0), + nfaPrePushExpr(0), + nfaPostPopExpr(0), + pExpr(0), + peExpr(0), + eofExpr(0), + csExpr(0), + topExpr(0), + stackExpr(0), + actExpr(0), + tokstartExpr(0), + tokendExpr(0), + dataExpr(0) +{ + keyOps = new KeyOps; + condData = new CondData; +} + +FsmCtx::~FsmCtx() +{ + delete keyOps; + delete condData; + priorDescList.empty(); + + actionList.empty(); + + if ( getKeyExpr != 0 ) + delete getKeyExpr; + if ( accessExpr != 0 ) + delete accessExpr; + if ( prePushExpr != 0 ) + delete prePushExpr; + if ( postPopExpr != 0 ) + delete postPopExpr; + if ( nfaPrePushExpr != 0 ) + delete nfaPrePushExpr; + if ( nfaPostPopExpr != 0 ) + delete nfaPostPopExpr; + if ( pExpr != 0 ) + delete pExpr; + if ( peExpr != 0 ) + delete peExpr; + if ( eofExpr != 0 ) + delete eofExpr; + if ( csExpr != 0 ) + delete csExpr; + if ( topExpr != 0 ) + delete topExpr; + if ( stackExpr != 0 ) + delete stackExpr; + if ( actExpr != 0 ) + delete actExpr; + if ( tokstartExpr != 0 ) + delete tokstartExpr; + if ( tokendExpr != 0 ) + delete tokendExpr; + if ( dataExpr != 0 ) + delete dataExpr; +} + +/* Graph constructor. */ +FsmAp::FsmAp( FsmCtx *ctx ) +: + ctx( ctx ), + + priorInteraction(false), + + /* No start state. */ + startState(0), + errState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmAp::FsmAp( const FsmAp &graph ) +: + ctx( graph.ctx ), + + priorInteraction(false), + + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + errState(0), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + StateAp *newState = new StateAp( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + StateAp *toState = trans->tdap()->toState != 0 ? + trans->tdap()->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->tdap()->toState = 0; + attachTrans( state, toState, trans->tdap() ); + + } + else { + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + StateAp *toState = cti->toState != 0 ? cti->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + cti->toState = 0; + attachTrans( state, toState, cti ); + } + } + } + + /* Fix the eofTarg, if set. */ + if ( state->eofTarget != 0 ) + state->eofTarget = state->eofTarget->alg.stateMap; + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + StateAp *targ = n->toState->alg.stateMap; + n->toState = 0; + attachToNfa( state, targ, n ); + } + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmAp::~FsmAp() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + for ( TransList::Iter n, t = state->outList; t.lte(); ) { + n = t.next(); + if ( t->plain() ) + delete t->tdap(); + else + delete t->tcap(); + t = n; + } + state->outList.abandon(); + + if ( state->nfaIn != 0 ) { + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + state->nfaOut->empty(); + delete state->nfaOut; + state->nfaOut = 0; + } + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmAp::setFinState( StateAp *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & STB_ISFINAL ) + return; + + state->stateBits |= STB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmAp::unsetFinState( StateAp *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & STB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ STB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmAp::setStartState( StateAp *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmAp::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmAp::setEntry( int id, StateAp *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmAp::unsetEntry( int id, StateAp *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmAp::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmAp::changeEntry( int id, StateAp *to, StateAp *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmAp::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmAp::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmAp::markReachableFromHere( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & STB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= STB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + markReachableFromHere( trans->tdap()->toState ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + markReachableFromHere( cond->toState ); + } + } + } + + /* Recurse on all states that compose us. */ + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter st = *state->nfaOut; st.lte(); st++ ) + markReachableFromHere( st->toState ); + } + + if ( state->stateDictEl != 0 ) { + for ( StateSet::Iter ss = state->stateDictEl->stateSet; ss.lte(); ss++ ) + markReachableFromHere( *ss ); + } +} + +/* Any transitions to another state? */ +bool FsmAp::anyRegularTransitions( StateAp *state ) +{ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + StateAp *toState = trans->tdap()->toState; + if ( toState != 0 ) + return true; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + StateAp *toState = cond->toState; + if ( toState != 0 ) + return true; + } + } + } + return false; +} + +void FsmAp::markReachableFromHereStopFinal( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & STB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= STB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + StateAp *toState = trans->tdap()->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + StateAp *toState = cond->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } + } + } + + /* Recurse on all states that compose us. */ + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter st = *state->nfaOut; st.lte(); st++ ) + markReachableFromHereStopFinal( st->toState ); + } + + if ( state->stateDictEl != 0 ) { + for ( StateSet::Iter ss = state->stateDictEl->stateSet; ss.lte(); ss++ ) + markReachableFromHereStopFinal( *ss ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmAp::markReachableFromHereReverse( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & STB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= STB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter t = state->inTrans; t.lte(); t++ ) + markReachableFromHereReverse( t->fromState ); + for ( CondInList::Iter t = state->inCond; t.lte(); t++ ) + markReachableFromHereReverse( t->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmAp::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inTrans.head != 0 ) + return false; + if ( startState->inCond.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmAp::copyInEntryPoints( FsmAp *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + + +void FsmAp::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ STB_ISFINAL; + finStateSet.empty(); +} + +void FsmAp::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + +void FsmAp::unsetFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits &= ~ finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmAp::verifyIntegrity() +{ + int count = 0; + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + assert( trans->tdap()->fromState == state ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + assert( cond->fromState == state ); + } + } + } + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter t = state->inTrans; t.lte(); t++ ) { + assert( t->toState == state ); + } + for ( CondInList::Iter t = state->inCond; t.lte(); t++ ) { + assert( t->toState == state ); + } + + count += 1; + } + + assert( stateList.length() == count ); +} + +void FsmAp::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & STB_ISMARKED ); + st->stateBits &= ~ STB_ISMARKED; + } +} + +void FsmAp::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= STB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & STB_ISMARKED ); + st->stateBits &= ~ STB_ISMARKED; + } +} + +void FsmAp::depthFirstOrdering( StateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & STB_ONLIST ) + return; + + /* Doing depth first, put state on the list. */ + state->stateBits |= STB_ONLIST; + stateList.append( state ); + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + depthFirstOrdering( trans->tdap()->toState ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + depthFirstOrdering( cond->toState ); + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter s = *state->nfaOut; s.lte(); s++ ) + depthFirstOrdering( s->toState ); + } +} + +/* Ordering states by transition connections. */ +void FsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->stateBits &= ~STB_ONLIST; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( errState != 0 ) + depthFirstOrdering( errState ); + + depthFirstOrdering( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( en->value ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Stable sort the states by final state status. */ +void FsmAp::sortStatesByFinal() +{ + /* Move forward through the list and move final states onto the end. */ + StateAp *state = 0; + StateAp *next = stateList.head; + StateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinState() ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +void FsmAp::setStateNumbers( int base ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->alg.stateNum = base++; +} + +bool FsmAp::checkErrTrans( StateAp *state, CondAp *trans ) +{ + /* Might go directly to error state. */ + if ( trans->toState == 0 ) + return true; + + return false; +} + +bool FsmAp::checkErrTrans( StateAp *state, TransAp *trans ) +{ + /* + * Look for a gap between this transition and the previous. + */ + if ( trans->prev == 0 ) { + /* If this is the first transition. */ + if ( ctx->keyOps->lt( ctx->keyOps->minKey, trans->lowKey ) ) + return true; + } + else { + /* Not the first transition. Compare against the prev. */ + TransAp *prev = trans->prev; + Key nextKey = prev->highKey; + ctx->keyOps->increment( nextKey ); + if ( ctx->keyOps->lt( nextKey, trans->lowKey ) ) + return true; + } + + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) + return true; + } + else { + /* Check for gaps in the condition list. */ + if ( trans->tcap()->condList.length() < trans->condFullSize() ) + return true; + + /* Check all destinations. */ + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + if ( checkErrTrans( state, cti ) ) + return true; + } + } + + return false; +} + +bool FsmAp::checkErrTransFinish( StateAp *state ) +{ + /* Check if there are any ranges already. */ + if ( state->outList.length() == 0 ) + return true; + else { + /* Get the last and check for a gap on the end. */ + TransAp *last = state->outList.tail; + if ( ctx->keyOps->lt( last->highKey, ctx->keyOps->maxKey ) ) + return true; + } + return 0; +} + +bool FsmAp::hasErrorTrans() +{ + bool result; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { + result = checkErrTrans( st, tr ); + if ( result ) + return true; + } + result = checkErrTransFinish( st ); + if ( result ) + return true; + } + return false; +} diff --git a/ragel/fsmcond.cc b/ragel/fsmcond.cc new file mode 100644 index 00000000..b2339c12 --- /dev/null +++ b/ragel/fsmcond.cc @@ -0,0 +1,520 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Setting conditions and merging states with conditions are similar activities + * when expressed in code. The critical difference is that a merge is a union + * of multiple paths. We have to take both paths. Setting a condition, however, + * is a restriction. We have to expand the transition to follow both values of + * the condition, then remove the one that is not set. + */ + +#include "fsmgraph.h" +#include "mergesort.h" +#include "parsedata.h" + +#include <assert.h> +#include <iostream> + +long TransAp::condFullSize() + { return condSpace == 0 ? 1 : condSpace->fullSize(); } + +void FsmAp::expandCondKeys( CondKeySet &condKeys, CondSpace *fromSpace, + CondSpace *mergedSpace ) +{ + CondSet fromCS, mergedCS; + + if ( fromSpace != 0 ) + fromCS.insert( fromSpace->condSet ); + + if ( mergedSpace != 0 ) + mergedCS.insert( mergedSpace->condSet ); + + /* Need to transform condition element to the merged set. */ + for ( int cti = 0; cti < condKeys.length(); cti++ ) { + long origVal = condKeys[cti]; + long newVal = 0; + + /* Iterate the bit positions in the from set. */ + for ( CondSet::Iter csi = fromCS; csi.lte(); csi++ ) { + /* If set, find it in the merged set and flip the bit to 1. */ + if ( origVal & (1 << csi.pos()) ) { + /* The condition is set. Find the bit position in the merged + * set. */ + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + newVal |= 1 << bitPos; + } + } + + if ( origVal != newVal ) + condKeys[cti] = newVal; + } + + /* Need to double up the whole transition list for each condition test in + * merged that is not in from. The one we add has the bit in question set. + * */ + for ( CondSet::Iter csi = mergedCS; csi.lte(); csi++ ) { + Action **cim = fromCS.find( *csi ); + if ( cim == 0 ) { + CondKeySet newItems; + newItems.append( condKeys ); + for ( int cti = 0; cti < condKeys.length(); cti++ ) { + int key = condKeys[cti] | (1 << csi.pos()); + newItems.insert( key ); + } + + condKeys.setAs( newItems ); + } + } +} + +void FsmAp::expandConds( StateAp *fromState, TransAp *trans, + CondSpace *fromSpace, CondSpace *mergedSpace ) +{ + CondSet fromCS, mergedCS; + + if ( fromSpace != 0 ) + fromCS.insert( fromSpace->condSet ); + + if ( mergedSpace != 0 ) + mergedCS.insert( mergedSpace->condSet ); + + /* Need to transform condition element to the merged set. */ + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + long origVal = cti->key.getVal(); + long newVal = 0; + + /* Iterate the bit positions in the from set. */ + for ( CondSet::Iter csi = fromCS; csi.lte(); csi++ ) { + /* If set, find it in the merged set and flip the bit to 1. */ + if ( origVal & (1 << csi.pos()) ) { + /* The condition is set. Find the bit position in the merged + * set. */ + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + newVal |= 1 << bitPos; + } + } + + if ( origVal != newVal ) + cti->key = newVal; + } + + /* Need to double up the whole transition list for each condition test in + * merged that is not in from. The one we add has the bit in question set. + * */ + for ( CondSet::Iter csi = mergedCS; csi.lte(); csi++ ) { + Action **cim = fromCS.find( *csi ); + if ( cim == 0 ) { + CondList newItems; + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + /* Sub-transition for conditions. */ + CondAp *cond = new CondAp( trans ); + + /* Attach only if our caller wants the expanded transitions + * attached. */ + attachTrans( fromState, cti->toState, cond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( cond, cti.ptr ); + + cond->key = cti->key.getVal() | (1 << csi.pos()); + + newItems.append( cond ); + } + + /* Merge newItems in. Both the condList and newItems are sorted. Make + * a sorted list out of them. */ + CondAp *dest = trans->tcap()->condList.head; + while ( dest != 0 && newItems.head != 0 ) { + if ( newItems.head->key.getVal() > dest->key.getVal() ) { + dest = dest->next; + } + else { + /* Pop the item for insertion. */ + CondAp *ins = newItems.detachFirst(); + trans->tcap()->condList.addBefore( dest, ins ); + } + } + + /* Append the rest of the items. */ + trans->tcap()->condList.append( newItems ); + } + } +} + +CondSpace *FsmAp::expandCondSpace( TransAp *destTrans, TransAp *srcTrans ) +{ + CondSet destCS, srcCS; + CondSet mergedCS; + + if ( destTrans->condSpace != 0 ) + destCS.insert( destTrans->condSpace->condSet ); + + if ( srcTrans->condSpace != 0 ) + srcCS.insert( srcTrans->condSpace->condSet ); + + mergedCS.insert( destCS ); + mergedCS.insert( srcCS ); + + return addCondSpace( mergedCS ); +} + +StateAp *FsmAp::copyStateForExpansion( StateAp *srcState ) +{ + StateAp *newState = new StateAp(); + newState->outCondSpace = srcState->outCondSpace; + newState->outCondKeys = srcState->outCondKeys; + return newState; +} + +void FsmAp::mergeOutConds( StateAp *destState, StateAp *srcState, bool leaving ) +{ + if ( destState == srcState ) + return; + + bool bothFinal = destState->isFinState() && srcState->isFinState(); + bool unionOp = !leaving; + + CondSet destCS, srcCS; + CondSet mergedCS; + + if ( destState->outCondSpace != 0 ) + destCS.insert( destState->outCondSpace->condSet ); + + if ( srcState->outCondSpace != 0 ) + srcCS.insert( srcState->outCondSpace->condSet ); + + mergedCS.insert( destCS ); + mergedCS.insert( srcCS ); + + if ( mergedCS.length() > 0 ) { + CondSpace *mergedSpace = addCondSpace( mergedCS ); + + CondSpace *srcSpace = srcState->outCondSpace; + CondKeySet srcKeys = srcState->outCondKeys; + + if ( srcSpace != mergedSpace ) { + /* Prep the key list with zero item if necessary. */ + if ( srcSpace == 0 ) + srcKeys.append( 0 ); + + expandCondKeys( srcKeys, srcSpace, mergedSpace ); + } + + if ( destState->outCondSpace != mergedSpace ) { + /* Prep the key list with zero item if necessary. */ + if ( destState->outCondSpace == 0 ) + destState->outCondKeys.append( 0 ); + + /* Now expand the dest. */ + expandCondKeys( destState->outCondKeys, destState->outCondSpace, mergedSpace ); + } + + destState->outCondSpace = mergedSpace; + + if ( unionOp && bothFinal ) { + /* Keys can come from either. */ + for ( CondKeySet::Iter c = srcKeys; c.lte(); c++ ) + destState->outCondKeys.insert( *c ); + } + else { + /* Keys need to be in both sets. */ + for ( long c = 0; c < destState->outCondKeys.length(); ) { + if ( !srcKeys.find( destState->outCondKeys[c] ) ) + destState->outCondKeys.CondKeyVect::remove( c, 1 ); + else + c++; + } + } + } +} + +CondSpace *FsmAp::addCondSpace( const CondSet &condSet ) +{ + CondSpace *condSpace = ctx->condData->condSpaceMap.find( condSet ); + if ( condSpace == 0 ) { + condSpace = new CondSpace( condSet ); + ctx->condData->condSpaceMap.insert( condSpace ); + } + return condSpace; +} + +TransDataAp *FsmAp::convertToTransAp( StateAp *from, CondAp *cond ) +{ + TransDataAp *newTrans = new TransDataAp(); + newTrans->lowKey = cond->transAp->lowKey; + newTrans->highKey = cond->transAp->highKey; + + newTrans->lmActionTable.setActions( cond->lmActionTable ); + newTrans->actionTable.setActions( cond->actionTable ); + newTrans->priorTable.setPriors( cond->priorTable ); + + attachTrans( from, cond->toState, newTrans ); + + /* Detach in list. */ + detachTrans( from, cond->toState, cond ); + delete cond->transAp; + delete cond; + + return newTrans; +} + +TransCondAp *FsmAp::convertToCondAp( StateAp *from, TransDataAp *trans ) +{ + TransCondAp *newTrans = new TransCondAp(); + newTrans->lowKey = trans->lowKey; + newTrans->highKey = trans->highKey; + newTrans->condSpace = trans->condSpace; + + CondAp *newCond = new CondAp( newTrans ); + newCond->key = 0; + newTrans->condList.append( newCond ); + + newCond->lmActionTable.setActions( trans->lmActionTable ); + newCond->actionTable.setActions( trans->actionTable ); + newCond->priorTable.setPriors( trans->priorTable ); + + attachTrans( from, trans->toState, newCond ); + + /* Detach in list. */ + detachTrans( from, trans->toState, trans ); + delete trans; + + return newTrans; +} + +void FsmAp::convertToCondAp( StateAp *state ) +{ + /* First replace TransDataAp with cond versions. */ + TransList destList; + for ( TransList::Iter tr = state->outList; tr.lte(); ) { + TransList::Iter next = tr.next(); + if ( tr->plain() ) { + TransCondAp *newTrans = convertToCondAp( state, tr->tdap() ); + destList.append( newTrans ); + } + else { + destList.append( tr ); + } + + tr = next; + } + + state->outList.abandon(); + state->outList.transfer( destList ); +} + +void FsmAp::doEmbedCondition( StateAp *state, + const CondSet &set, const CondKeySet &vals ) +{ + convertToCondAp( state ); + + for ( TransList::Iter tr = state->outList; tr.lte(); tr++ ) { + + /* The source (being embedded). */ + CondSpace *srcSpace = addCondSpace( set ); + CondKeySet srcVals = vals; + + /* Extract cond key set from the condition list. We will use this to + * compute the intersection of the cond keys. */ + CondSpace *trSpace = tr->condSpace; + CondKeySet trVals; + if ( tr->condSpace == 0 ) + trVals.append( 0 ); + else { + for ( CondList::Iter cti = tr->tcap()->condList; cti.lte(); cti++ ) { + long key = cti->key.getVal(); + trVals.append( key ); + } + } + + /* Construct merged. */ + CondSet mergedCS; + if ( tr->condSpace != 0 ) + mergedCS.insert( tr->condSpace->condSet ); + mergedCS.insert( set ); + + CondSpace *mergedSpace = addCondSpace( mergedCS ); + + if ( srcSpace != mergedSpace ) { + /* Prep the key list with zero item if necessary. */ + if ( srcSpace == 0 ) + srcVals.append( 0 ); + + expandCondKeys( srcVals, srcSpace, mergedSpace ); + } + + if ( trSpace != mergedSpace ) { + /* Don't need to prep the key list with zero item, will be there + * (see above). */ + expandCondKeys( trVals, trSpace, mergedSpace ); + } + + /* Implement AND, in two parts. */ + CondKeySet newItems; + for ( CondKeySet::Iter c = srcVals; c.lte(); c++ ) { + if ( trVals.find( *c ) ) + newItems.insert( *c ); + } + + for ( CondKeySet::Iter c = trVals; c.lte(); c++ ) { + if ( srcVals.find( *c ) ) + newItems.insert( *c ); + } + + /* Expand the transitions, then we remove anything not in the computed + * list of keys. This approach allows us to embed combinations of + * senses, rather than cond-sense pairs. Necessary for out conditions. */ + CondSpace *orig = tr->condSpace; + tr->condSpace = mergedSpace; + expandConds( state, tr, orig, mergedSpace ); + + /* After expansion, remove anything not in newItems. */ + for ( CondList::Iter cti = tr->tcap()->condList; cti.lte(); ) { + long key = cti->key.getVal(); + + if ( !newItems.find( key ) ) { + /* Delete. */ + CondList::Iter next = cti.next(); + + CondAp *cond = cti; + detachTrans( state, cond->toState, cond ); + tr->tcap()->condList.detach( cond ); + delete cond; + + cti = next; + } + else { + /* Leave alone. */ + cti++; + } + } + } +} + +FsmRes FsmAp::embedCondition( FsmAp *fsm, StateAp *state, const CondSet &set, const CondKeySet &vals ) +{ + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* Worker. */ + fsm->doEmbedCondition( state, set, vals ); + + /* Fill in any states that were newed up as combinations of others. */ + FsmRes res = fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return res; +} + +void FsmAp::addOutCondition( StateAp *state, Action *condAction, bool sense ) +{ + CondSet origCS; + if ( state->outCondSpace != 0 ) + origCS.insert( state->outCondSpace->condSet ); + + CondSet mergedCS; + mergedCS.insert( origCS ); + + bool added = mergedCS.insert( condAction ); + if ( !added ) { + + /* Already exists in the cond set. For every transition, if the + * sense is identical to what we are embedding, leave it alone. If + * the sense is opposite, delete it. */ + + /* Find the position. */ + long pos = 0; + for ( CondSet::Iter csi = mergedCS; csi.lte(); csi++ ) { + if ( *csi == condAction ) + pos = csi.pos(); + } + + for ( int cti = 0; cti < state->outCondKeys.length(); ) { + long key = state->outCondKeys[cti]; + + bool set = ( key & ( 1 << pos ) ) != 0; + if ( sense xor set ) { + /* Delete. */ + state->outCondKeys.CondKeyVect::remove( cti, 1 ); + } + else { + /* Leave alone. */ + cti++; + } + } + } + else { + /* Does not exist in the cond set. We will add it. */ + + if ( state->outCondSpace == 0 ) { + /* Note that unlike transitions, we start here with an empty key + * list. Add the item */ + state->outCondKeys.append( 0 ); + } + + /* Allocate a cond space for the merged set. */ + CondSpace *mergedCondSpace = addCondSpace( mergedCS ); + state->outCondSpace = mergedCondSpace; + + /* FIXME: assumes one item always. */ + + /* Translate original condition values, making space for the new bit + * (possibly) introduced by the condition embedding. */ + for ( int cti = 0; cti < state->outCondKeys.length(); cti++ ) { + long origVal = state->outCondKeys[cti]; + long newVal = 0; + + /* For every set bit in the orig, find it's position in the merged + * and set the bit appropriately. */ + for ( CondSet::Iter csi = origCS; csi.lte(); csi++ ) { + /* If set, find it in the merged set and flip the bit to 1. If + * not set, there is nothing to do (convenient eh?) */ + if ( origVal & (1 << csi.pos()) ) { + /* The condition is set. Find the bit position in the + * merged set. */ + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + newVal |= 1 << bitPos; + } + } + + if ( origVal != newVal ) + state->outCondKeys[cti] = newVal; + + /* Now set the new bit appropriately. Since it defaults to zero we + * only take action if sense is positive. */ + if ( sense ) { + Action **cim = mergedCS.find( condAction ); + int pos = cim - mergedCS.data; + state->outCondKeys[cti] = state->outCondKeys[cti] | (1 << pos); + } + } + } +} diff --git a/ragel/fsmgraph.cc b/ragel/fsmgraph.cc new file mode 100644 index 00000000..819bfa96 --- /dev/null +++ b/ragel/fsmgraph.cc @@ -0,0 +1,1948 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "action.h" + +using std::endl; + +Action::~Action() +{ + /* If we were created by substitution of another action then we don't own the inline list. */ + if ( substOf == 0 && inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + inlineList = 0; + } +} + +InlineItem::~InlineItem() +{ + if ( children != 0 ) { + children->empty(); + delete children; + } +} + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +StateAp *FsmAp::addState() +{ + /* Make the new state to return. */ + StateAp *state = new StateAp(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +FsmAp *FsmAp::concatFsm( FsmCtx *ctx, Key *str, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Make the first state and set it as the start state. */ + StateAp *last = fsm->addState(); + fsm->setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = fsm->addState(); + fsm->attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + fsm->setFinState( last ); + + return fsm; +} + +/* Case insensitive version of concatFsm. */ +FsmAp *FsmAp::concatFsmCI( FsmCtx *ctx, Key *str, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Make the first state and set it as the start state. */ + StateAp *last = fsm->addState(); + fsm->setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = fsm->addState(); + + KeySet keySet( ctx->keyOps ); + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + fsm->attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + fsm->setFinState( last ); + + return fsm; +} + + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. */ +FsmAp *FsmAp::concatFsm( FsmCtx *ctx, Key chr ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach on the character. */ + fsm->attachNewTrans( fsm->startState, end, chr, chr ); + + return fsm; +} + +/* Case insensitive version of single-char concat FSM. */ +FsmAp *FsmAp::concatFsmCI( FsmCtx *ctx, Key chr ) +{ + return concatFsmCI( ctx, &chr, 1 ); +} + + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +FsmAp *FsmAp::orFsm( FsmCtx *ctx, Key *set, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( ctx->keyOps->lt( set[i-1], set[i] ) ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + fsm->attachNewTrans( fsm->startState, end, set[i], set[i] ); + + return fsm; +} + +FsmAp *FsmAp::dotFsm( FsmCtx *ctx ) +{ + FsmAp *retFsm = FsmAp::rangeFsm( ctx, + ctx->keyOps->minKey, ctx->keyOps->maxKey ); + return retFsm; +} + +FsmAp *FsmAp::dotStarFsm( FsmCtx *ctx ) +{ + FsmAp *retFsm = FsmAp::rangeStarFsm( ctx, + ctx->keyOps->minKey, ctx->keyOps->maxKey ); + return retFsm; +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +FsmAp *FsmAp::rangeFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach using the range of characters. */ + fsm->attachNewTrans( fsm->startState, end, low, high ); + + return fsm; +} + +FsmAp *FsmAp::notRangeFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach using the range of characters. */ + if ( ctx->keyOps->lt( ctx->keyOps->minKey, low ) ) { + ctx->keyOps->decrement( low ); + fsm->attachNewTrans( fsm->startState, end, ctx->keyOps->minKey, low ); + } + + if ( ctx->keyOps->lt( high, ctx->keyOps->maxKey ) ) { + ctx->keyOps->increment( high ); + fsm->attachNewTrans( fsm->startState, end, high, ctx->keyOps->maxKey ); + } + + return fsm; +} + + +FsmAp *FsmAp::rangeFsmCI( FsmCtx *ctx, Key lowKey, Key highKey ) +{ + FsmAp *retFsm = rangeFsm( ctx, lowKey, highKey ); + + /* Union the portion that covers alphas. */ + if ( lowKey.getVal() <= 'z' ) { + int low, high; + if ( lowKey.getVal() <= 'a' ) + low = 'a'; + else + low = lowKey.getVal(); + + if ( highKey.getVal() >= 'a' ) { + if ( highKey.getVal() >= 'z' ) + high = 'z'; + else + high = highKey.getVal(); + + /* Add in upper(low) .. upper(high) */ + + FsmAp *addFsm = FsmAp::rangeFsm( ctx, toupper(low), toupper(high) ); + FsmRes res = FsmAp::unionOp( retFsm, addFsm ); + retFsm = res.fsm; + } + } + + if ( lowKey.getVal() <= 'Z' ) { + int low, high; + if ( lowKey.getVal() <= 'A' ) + low = 'A'; + else + low = lowKey.getVal(); + + if ( highKey.getVal() >= 'A' ) { + if ( highKey.getVal() >= 'Z' ) + high = 'Z'; + else + high = highKey.getVal(); + + /* Add in lower(low) .. lower(high) */ + FsmAp *addFsm = FsmAp::rangeFsm( ctx, tolower(low), tolower(high) ); + FsmRes res = FsmAp::unionOp( retFsm, addFsm ); + retFsm = res.fsm; + } + } + + return retFsm; +} + +/* Construct a machine that a repeated range of characters. */ +FsmAp *FsmAp::rangeStarFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* One state which is final and is the start state. */ + fsm->setStartState( fsm->addState() ); + fsm->setFinState( fsm->startState ); + + /* Attach start to start using range of characters. */ + fsm->attachNewTrans( fsm->startState, fsm->startState, low, high ); + + return fsm; +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +FsmAp *FsmAp::lambdaFsm( FsmCtx *ctx ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Give it one state with no transitions making it + * the start state and final state. */ + fsm->setStartState( fsm->addState() ); + fsm->setFinState( fsm->startState ); + + return fsm; +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +FsmAp *FsmAp::emptyFsm( FsmCtx *ctx ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Give it one state with no transitions making it + * the start state and final state. */ + fsm->setStartState( fsm->addState() ); + + return fsm; +} + +void FsmAp::transferOutData( StateAp *destState, StateAp *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->tdap()->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->tdap()->priorTable.setPriors( srcState->outPriorTable ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + cond->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + cond->priorTable.setPriors( srcState->outPriorTable ); + } + } + } + } + + if ( destState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *destState->nfaOut; na.lte(); na++ ) + transferOutToNfaTrans( na, srcState ); + } +} + +/* Union worker used by union, set diff (subtract) and intersection. */ +FsmRes FsmAp::doUnion( FsmAp *fsm, FsmAp *other ) +{ + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( fsm->startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + fsm->unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + fsm->stateList.append( other->stateList ); + fsm->misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + fsm->finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + fsm->setStartState( fsm->addState() ); + + /* Merge the start states. */ + fsm->mergeStateList( fsm->startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + return fillInStates( fsm ); +} + +bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + StateAp *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmAp::shadowReadWriteStates() +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + StateAp *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + StateAp *shadow = addState(); + mergeStates( shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmAp::resolveEpsilonTrans() +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates(); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( st, ept->targ ); + else + mergeStates( st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +FsmRes FsmAp::applyNfaTrans( FsmAp *fsm, StateAp *fromState, StateAp *toState, NfaTrans *nfaTrans ) +{ + fsm->setMisfitAccounting( true ); + + fsm->mergeStates( fromState, toState, false ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Can nuke the epsilon transition that we will never + * follow. */ + fsm->detachFromNfa( fromState, toState, nfaTrans ); + fromState->nfaOut->detach( nfaTrans ); + delete nfaTrans; + + if ( fromState->nfaOut->length() == 0 ) { + delete fromState->nfaOut; + fromState->nfaOut = 0; + } + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +void FsmAp::globOp( FsmAp **others, int numOthers ) +{ + for ( int m = 0; m < numOthers; m++ ) { + assert( ctx == others[m]->ctx ); + } + + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +/* Used near the end of an fsm construction. Any labels that are still around + * are referenced only by gotos and calls and they need to be made into + * deterministic entry points. */ +void FsmAp::deterministicEntry() +{ + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + StateAp *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & STB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~STB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & STB_BOTH && + (state->stateBits & STB_BOTH) != STB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~STB_BOTH; + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +FsmRes FsmAp::starOp( FsmAp *fsm ) +{ + /* The start func orders need to be shifted before doing the star. */ + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + StateAp *prevStartState = fsm->startState; + fsm->unsetStartState(); + fsm->setStartState( fsm->addState() ); + + /* Merge the new start state with the old one to isolate it. */ + fsm->mergeStates( fsm->startState, prevStartState ); + + if ( !fsm->startState->isFinState() ) { + /* Common case, safe to merge. */ + for ( StateSet::Iter st = fsm->finStateSet; st.lte(); st++ ) + fsm->mergeStatesLeaving( *st, fsm->startState ); + } + else { + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + StateSet origFin = fsm->finStateSet; + for ( StateSet::Iter st = origFin; st.lte(); st++ ) { + if ( *st != fsm->startState ) + fsm->mergeStatesLeaving( *st, fsm->startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( fsm->startState->isFinState() ) + fsm->mergeStatesLeaving( fsm->startState, fsm->startState ); + } + + /* Now ensure the new start state is a final state. */ + fsm->setFinState( fsm->startState ); + + /* Fill in any states that were newed up as combinations of others. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::plusOp( FsmAp *fsm ) +{ + /* Need a duplicate for the star end. */ + FsmAp *factorDup = new FsmAp( *fsm ); + + /* Star the duplicate. */ + FsmRes res1 = FsmAp::starOp( factorDup ); + if ( !res1.success() ) + return res1; + + FsmRes res2 = FsmAp::concatOp( fsm, res1.fsm ); + if ( !res2.success() ) + return res2; + + return res2; +} + +FsmRes FsmAp::questionOp( FsmAp *fsm ) +{ + /* Make the null fsm. */ + FsmAp *nu = FsmAp::lambdaFsm( fsm->ctx ); + + /* Perform the question operator. */ + FsmRes res = FsmAp::unionOp( fsm, nu ); + if ( !res.success() ) + return res; + + return res; +} + +FsmRes FsmAp::exactRepeatOp( FsmAp *fsm, int times ) +{ + /* Zero repetitions produces lambda machine. */ + if ( times == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *fsm ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmAp *dup = new FsmAp( *copyFrom ); + FsmRes res = concatOp( fsm, dup ); + if ( !res.success() ) { + delete copyFrom; + return res; + } + } + + /* Now use the copyFrom on the end. */ + FsmRes res = concatOp( fsm, copyFrom ); + if ( !res.success()) + return res; + + res.fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::maxRepeatOp( FsmAp *fsm, int times ) +{ + /* Zero repetitions produces lambda machine. */ + if ( times == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + isolateStartState( fsm ); + fsm->setFinState( fsm->startState ); + return FsmRes( FsmRes::Fsm(), fsm ); + } + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *fsm ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( fsm->finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + isolateStartState( fsm ); + fsm->setFinState( fsm->startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmAp *dup = new FsmAp( *copyFrom ); + dup->setFinBits( STB_GRAPH2 ); + FsmRes res = concatOp( fsm, dup, false, &lastFinSet, true ); + if ( !res.success() ) { + delete copyFrom; + return res; + } + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < fsm->finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + StateAp *fs = fsm->finStateSet[i]; + if ( fs->stateBits & STB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~STB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + FsmRes res = concatOp( fsm, copyFrom, false, &lastFinSet, true ); + if ( !res.success() ) + return res; + + res.fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::minRepeatOp( FsmAp *fsm, int times ) +{ + if ( times == 0 ) { + /* Acts just like a star op on the machine to return. */ + return FsmAp::starOp( fsm ); + } + else { + /* Take a duplicate for the star below. */ + FsmAp *dup = new FsmAp( *fsm ); + + /* Do repetition on the first half. */ + FsmRes exact = FsmAp::exactRepeatOp( fsm, times ); + if ( !exact.success() ) { + delete dup; + return exact; + } + + /* Star the duplicate. */ + FsmRes star = FsmAp::starOp( dup ); + if ( !star.success() ) { + delete exact.fsm; + return star; + } + + /* Tack on the kleene star. */ + return FsmAp::concatOp( exact.fsm, star.fsm ); + } +} + +FsmRes FsmAp::rangeRepeatOp( FsmAp *fsm, int lowerRep, int upperRep ) +{ + if ( lowerRep == 0 && upperRep == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + else if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + return FsmAp::maxRepeatOp( fsm, upperRep ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + return FsmAp::exactRepeatOp( fsm, lowerRep ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmAp *dup = new FsmAp( *fsm ); + + /* Do repetition on the first half. */ + FsmRes exact = FsmAp::exactRepeatOp( fsm, lowerRep ); + if ( !exact.success() ) { + delete dup; + return exact; + } + + /* Do optional repetition on the second half. */ + FsmRes optional = FsmAp::maxRepeatOp( dup, upperRep - lowerRep ); + if ( !optional.success() ) { + delete exact.fsm; + return optional; + } + + /* Concat two halves. */ + return FsmAp::concatOp( exact.fsm, optional.fsm ); + } +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. Supports specifying the fromStates (istead of first final state + * set). This is useful for a max-repeat schenario, where from states are not + * all of first's final states. Also supports treating the concatentation as + * optional, which leaves the final states of the first machine as final. */ +FsmRes FsmAp::concatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq, StateSet *fromStates, bool optional ) +{ + for ( PriorTable::Iter g = other->startState->guardedInTable; g.lte(); g++ ) { + fsm->allTransPrior( 0, g->desc ); + other->allTransPrior( 0, g->desc->other ); + } + + /* Assert same signedness and return graph concatenation op. */ + assert( fsm->ctx == other->ctx ); + + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + StateAp *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + fsm->stateList.append( other->stateList ); + fsm->misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = fsm->finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + fsm->unsetAllFinStates(); + fsm->finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + StateAp *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + fsm->mergeStatesLeaving( state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + fsm->clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + FsmRes res = fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + res.fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +FsmRes FsmAp::rightStartConcatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + PriorDesc *priorDesc0 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc1 = fsm->ctx->allocPriorDesc(); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDesc0->key = fsm->ctx->nextPriorKey++; + priorDesc0->priority = 0; + fsm->allTransPrior( fsm->ctx->curPriorOrd++, priorDesc0 ); + + /* The start transitions of the right machine gets the higher + * priority. Use the same unique key. */ + priorDesc1->key = priorDesc0->key; + priorDesc1->priority = 1; + other->startFsmPrior( fsm->ctx->curPriorOrd++, priorDesc1 ); + + return concatOp( fsm, other, lastInSeq ); +} + +/* Returns union of fsm and other. Other is deleted. */ +FsmRes FsmAp::unionOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + fsm->ctx->unionOp = true; + + fsm->setFinBits( STB_GRAPH1 ); + other->setFinBits( STB_GRAPH2 ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + fsm->ctx->unionOp = false; + fsm->unsetFinBits( STB_BOTH ); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +/* Intersects other with this machine. Other is deleted. */ +FsmRes FsmAp::intersectOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + fsm->setFinBits( STB_GRAPH1 ); + other->setFinBits( STB_GRAPH2 ); + + /* Call worker Or routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Unset any final states that are no longer to + * be final due to final bits. */ + fsm->unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + fsm->removeDeadEndStates(); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +FsmRes FsmAp::subtractOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( STB_GRAPH1 ); + + /* Call worker Or routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Unset any final states that are no longer to + * be final due to final bits. */ + fsm->unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + fsm->removeDeadEndStates(); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +FsmRes FsmAp::epsilonOp( FsmAp *fsm ) +{ + fsm->setMisfitAccounting( true ); + + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + fsm->resolveEpsilonTrans(); + + /* Epsilons can caused merges which leave behind unreachable states. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return res; +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +FsmRes FsmAp::joinOp( FsmAp *fsm, int startId, int finalId, FsmAp **others, int numOthers ) +{ + for ( int m = 0; m < numOthers; m++ ) { + assert( fsm->ctx == others[m]->ctx ); + } + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + fsm->unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + fsm->stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + fsm->finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = fsm->entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + fsm->setStartState( fsm->addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + StateAp *newStart = fsm->addState(); + fsm->setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + fsm->mergeStateList( newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = fsm->finStateSet; + + /* Now all final states are unset. */ + fsm->unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + StateAp *finState = fsm->addState(); + fsm->setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + fsm->setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + fsm->resolveEpsilonTrans(); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & STB_ISFINAL) ) + fsm->clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + fsm->removeUnreachableStates(); + + return res; +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +FsmRes FsmAp::isolateStartState( FsmAp *fsm ) +{ + /* Do nothing if the start state is already isolated. */ + if ( fsm->isStartStateIsolated() ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + StateAp *prevStartState = fsm->startState; + fsm->unsetStartState(); + fsm->setStartState( fsm->addState() ); + + /* Merge the new start state with the old one to isolate it. */ + fsm->mergeStates( fsm->startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( fsm->stateDict.treeSize == 0 ); + assert( fsm->nfaList.length() == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +StateAp *FsmAp::dupStartState() +{ + StateAp *dup = addState(); + mergeStates( dup, startState ); + return dup; +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the source state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmAp::mergeStatesLeaving( StateAp *destState, StateAp *srcState ) +{ + if ( !hasOutData( destState ) ) { + /* Perform the merge, indicating we are leaving, which will affect how + * out conds are merged. */ + mergeStates( destState, srcState, true ); + } + else { + /* Dup the source state. */ + StateAp *ssMutable = addState(); + mergeStates( ssMutable, srcState ); + + /* Do out data transfer (and out condition embedding). */ + transferOutData( ssMutable, destState ); + + if ( destState->outCondSpace != 0 ) { + + doEmbedCondition( ssMutable, destState->outCondSpace->condSet, + destState->outCondKeys ); + } + + /* Now we merge with dest, setting leaving = true. This dictates how + * out conditions should be merged. */ + mergeStates( destState, ssMutable, true ); + } +} + +void FsmAp::checkEpsilonRegularInteraction( const PriorTable &t1, const PriorTable &t2 ) +{ + for ( PriorTable::Iter pd1 = t1; pd1.lte(); pd1++ ) { + for ( PriorTable::Iter pd2 = t2; pd2.lte(); pd2++ ) { + /* Looking for unequal guarded priorities with the same key. */ + if ( pd1->desc->key == pd2->desc->key ) { + if ( pd1->desc->priority < pd2->desc->priority || + pd1->desc->priority > pd2->desc->priority ) + { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + } + } + } + } +} + +void FsmAp::mergeStateProperties( StateAp *destState, StateAp *srcState ) +{ + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + + /* Not touching guarded-in table or out conditions. Probably should + * leave some of the above alone as well. */ + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + destState->lmNfaParts.insert( srcState->lmNfaParts ); + destState->guardedInTable.setPriors( srcState->guardedInTable ); + } +} + +void FsmAp::mergeStateBits( StateAp *destState, StateAp *srcState ) +{ + /* Get bits and final state status. Note in the above code we depend on the + * original final state status being present. */ + destState->stateBits |= ( srcState->stateBits & ~STB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); +} + +void FsmAp::mergeNfaTransitions( StateAp *destState, StateAp *srcState ) +{ + /* Copy in any NFA transitions. */ + if ( srcState->nfaOut != 0 ) { + if ( destState->nfaOut == 0 ) + destState->nfaOut = new NfaTransList; + + for ( NfaTransList::Iter nt = *srcState->nfaOut; nt.lte(); nt++ ) { + NfaTrans *trans = new NfaTrans( + nt->pushTable, nt->restoreTable, + nt->popFrom, nt->popCondSpace, nt->popCondKeys, + nt->popAction, nt->popTest, nt->order ); + + destState->nfaOut->append( trans ); + attachToNfa( destState, nt->toState, trans ); + } + } +} + +void FsmAp::checkPriorInteractions( StateAp *destState, StateAp *srcState ) +{ + /* Run a check on priority interactions between epsilon transitions and + * regular transitions. This can't be used to affect machine construction, + * only to check for priority guards. */ + if ( destState->nfaOut != 0 ) { + for ( NfaTransList::Iter nt = *destState->nfaOut; nt.lte(); nt++ ) { + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + checkEpsilonRegularInteraction( + trans->tdap()->priorTable, nt->priorTable ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; + cond.lte(); cond++ ) + { + checkEpsilonRegularInteraction( + cond->priorTable, nt->priorTable ); + + } + } + } + } + } +} + +void FsmAp::mergeStates( StateAp *destState, StateAp *srcState, bool leaving ) +{ + /* Transitions. */ + outTransCopy( destState, srcState->outList.head ); + + /* Properties such as out data, to/from actions. */ + mergeStateProperties( destState, srcState ); + + /* Merge out conditions, depends on the operation (leaving or not). */ + mergeOutConds( destState, srcState, leaving ); + + /* State bits, including final state stats. Out conds depnds on this + * happening after. */ + mergeStateBits( destState, srcState ); + + /* Draw in the NFA transitions. */ + mergeNfaTransitions( destState, srcState ); + + /* Hacked in check for priority interactions, allowing detection of some + * bad situations. */ + checkPriorInteractions( destState, srcState ); +} + +void FsmAp::mergeStateList( StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( destState, srcStates[s] ); +} + +void FsmAp::cleanAbortedFill( StateAp *state ) +{ + /* Iterate the out transitions, deleting them. */ + for ( TransList::Iter n, t = state->outList; t.lte(); ) { + n = t.next(); + if ( t->plain() ) + delete t->tdap(); + else + delete t->tcap(); + t = n; + } + + state->outList.abandon(); + + if ( state->nfaIn != 0 ) { + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + state->nfaOut->empty(); + delete state->nfaOut; + state->nfaOut = 0; + } +} + +void FsmAp::cleanAbortedFill() +{ + while ( nfaList.length() > 0 ) { + StateAp *state = nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + //mergeStateList( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + nfaList.detach( state ); + } + + /* Disassociated state dict elements from states. */ + for ( StateDict::Iter sdi = stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete all the state dict elements. */ + stateDict.empty(); + + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + cleanAbortedFill( state ); + + /* Delete all the states. */ + stateList.empty(); + + /* Delete all the transitions. */ + for ( StateList::Iter state = misfitList; state.lte(); state++ ) + cleanAbortedFill( state ); + + /* Delete all the states. */ + misfitList.empty(); +} + +bool FsmAp::overStateLimit() +{ + if ( ctx->stateLimit > FsmCtx::STATE_UNLIMITED ) { + long states = misfitList.length() + stateList.length(); + if ( states > ctx->stateLimit ) + return true; + } + return false; +} + +bool FsmAp::fillAbort( FsmRes &res, FsmAp *fsm ) +{ + if ( fsm->priorInteraction ) { + fsm->cleanAbortedFill(); + int guardId = fsm->guardId; + delete fsm; + res = FsmRes( FsmRes::PriorInteraction(), guardId ); + return true; + } + + if ( fsm->overStateLimit() ) { + fsm->cleanAbortedFill(); + delete fsm; + res = FsmRes( FsmRes::TooManyStates() ); + return true; + } + + return false; +} + +FsmRes FsmAp::fillInStates( FsmAp *fsm ) +{ + /* Used as return value on success. Filled in with error on abort. */ + FsmRes res( FsmRes::Fsm(), fsm ); + + /* Merge any states that are awaiting merging. This will likey cause other + * states to be added to the NFA list. */ + while ( true ) { + if ( fillAbort( res, fsm ) ) + return res; + + if ( fsm->nfaList.length() == 0 ) + break; + + StateAp *state = fsm->nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + fsm->mergeStateList( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + fsm->detachStateDict( state, *s ); + + fsm->nfaList.detach( state ); + } + + /* The NFA list is empty at this point. There are no state sets we need to + * preserve. */ + + /* Disassociated state dict elements from states. */ + for ( StateDict::Iter sdi = fsm->stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete all the state dict elements. */ + fsm->stateDict.empty(); + + return res; +} + +/* Check if a machine defines a single character. This is useful in validating + * ranges and machines to export. */ +bool FsmAp::checkSingleCharMachine() +{ + /* Must have two states. */ + if ( stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + TransAp *startTrans = startState->outList.head; + if ( ctx->keyOps->ne( startTrans->lowKey, startTrans->highKey ) ) + return false; + return true; +} + +FsmRes FsmAp::condCostFromState( FsmAp *fsm, StateAp *state, long depth ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & STB_ONLIST ) + return FsmRes( FsmRes::Fsm(), fsm ); + + if ( depth > fsm->ctx->condsCheckDepth ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Doing depth first, put state on the list. */ + state->stateBits |= STB_ONLIST; + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + FsmRes res = condCostFromState( fsm, trans->tdap()->toState, depth + 1 ); + if ( !res.success() ) + return res; + } + } + else { + for ( CondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + if ( (*csi)->costMark ) + return FsmRes( FsmRes::CondCostTooHigh(), (*csi)->costId ); + } + + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + FsmRes res = condCostFromState( fsm, cond->toState, depth + 1 ); + if ( !res.success() ) + return res; + } + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + /* We do not increment depth here since this is an epsilon transition. */ + FsmRes res = condCostFromState( fsm, n->toState, depth ); + if ( !res.success() ) + return res; + } + } + + for ( ActionTable::Iter a = state->fromStateActionTable; a.lte(); a++ ) { + if ( a->value->costMark ) + return FsmRes( FsmRes::CondCostTooHigh(), a->value->costId ); + } + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* Returns either success (using supplied fsm), or some error condition. */ +FsmRes FsmAp::condCostSearch( FsmAp *fsm ) +{ + /* Init on state list flags. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->stateBits &= ~STB_ONLIST; + + FsmRes res = condCostFromState( fsm, fsm->startState, 1 ); + if ( !res.success() ) + delete fsm; + return res; +} + +void FsmAp::condCost( Action *action, long repId ) +{ + action->costMark = true; + action->costId = repId; +} + +/* + * This algorithm assigns a price to each state visit, then adds that to a + * running total. Note that we do not guard against multiple visits to a state, + * since we are estimating runtime cost. + * + * We rely on a character histogram and are looking for a probability of being + * in any given state, given that histogram, simple and very effective. + */ +void FsmAp::breadthFromState( double &total, int &minDepth, double *histogram, + FsmAp *fsm, StateAp *state, long depth, int maxDepth, double stateScore ) +{ + if ( depth > maxDepth ) + return; + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + + /* Compute target state score. */ + double span = 0; + for ( int i = trans->lowKey.getVal(); i <= trans->highKey.getVal(); i++ ) + span += histogram[i]; + + double targetStateScore = stateScore * ( span ); + + /* Add to the level. */ + total += targetStateScore; + + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + if ( trans->tdap()->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + breadthFromState( total, minDepth, histogram, fsm, trans->tdap()->toState, + depth + 1, maxDepth, targetStateScore ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + if ( cond->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + breadthFromState( total, minDepth, histogram, fsm, cond->toState, + depth + 1, maxDepth, targetStateScore ); + } + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + if ( n->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + /* We do not increment depth here since this is an epsilon transition. */ + breadthFromState( total, minDepth, histogram, fsm, n->toState, depth, maxDepth, stateScore ); + } + } +} + +void FsmAp::breadthFromEntry( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state ) +{ + long depth = 1; + int maxDepth = 5; + double stateScore = 1.0; + + FsmAp::breadthFromState( total, minDepth, histogram, fsm, state, depth, maxDepth, stateScore ); +} + + +void FsmAp::applyEntryPriorGuard( FsmAp *fsm, long repId ) +{ + PriorDesc *priorDesc0 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc1 = fsm->ctx->allocPriorDesc(); + + priorDesc0->key = fsm->ctx->nextPriorKey; + priorDesc0->priority = 0; + priorDesc0->guarded = true; + priorDesc0->guardId = repId; + priorDesc0->other = priorDesc1; + + priorDesc1->key = fsm->ctx->nextPriorKey; + priorDesc1->priority = 1; + priorDesc1->guarded = true; + priorDesc1->guardId = repId; + priorDesc1->other = priorDesc0; + + /* Roll over for next allocation. */ + fsm->ctx->nextPriorKey += 1; + + /* Only need to set the first. Second is referenced using 'other' field. */ + fsm->startState->guardedInTable.setPrior( 0, priorDesc0 ); +} + +void FsmAp::applyRepeatPriorGuard( FsmAp *fsm, long repId ) +{ + PriorDesc *priorDesc2 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc3 = fsm->ctx->allocPriorDesc(); + + priorDesc2->key = fsm->ctx->nextPriorKey; + priorDesc2->priority = 0; + priorDesc2->guarded = true; + priorDesc2->guardId = repId; + priorDesc2->other = priorDesc3; + + priorDesc3->key = fsm->ctx->nextPriorKey; + priorDesc3->guarded = true; + priorDesc3->priority = 1; + priorDesc3->guardId = repId; + priorDesc3->other = priorDesc2; + + /* Roll over for next allocation. */ + fsm->ctx->nextPriorKey += 1; + + /* Only need to set the first. Second is referenced using 'other' field. */ + fsm->startState->guardedInTable.setPrior( 0, priorDesc2 ); + + fsm->allTransPrior( fsm->ctx->curPriorOrd++, priorDesc3 ); + fsm->leaveFsmPrior( fsm->ctx->curPriorOrd++, priorDesc2 ); +} + +FsmRes FsmAp::condPlus( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ) +{ + condCost( ini, repId ); + condCost( inc, repId ); + condCost( min, repId ); + if ( max != 0 ) + condCost( max, repId ); + + fsm->startFsmAction( 0, inc ); + + if ( max != 0 ) { + FsmRes res = fsm->startFsmCondition( max, true ); + if ( !res.success() ) + return res; + } + + /* Need a duplicated for the star end. */ + FsmAp *dup = new FsmAp( *fsm ); + + applyRepeatPriorGuard( dup, repId ); + + /* Star the duplicate. */ + FsmRes dupStar = FsmAp::starOp( dup ); + if ( !dupStar.success() ) { + delete fsm; + return dupStar; + } + + FsmRes res = FsmAp::concatOp( fsm, dupStar.fsm ); + if ( !res.success() ) + return res; + + /* End plus operation. */ + + res.fsm->leaveFsmCondition( min, true ); + + /* Init action. */ + res.fsm->startFromStateAction( 0, ini ); + + /* Leading priority guard. */ + applyEntryPriorGuard( res.fsm, repId ); + + return res; +} + +FsmRes FsmAp::condStar( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ) +{ + condCost( ini, repId ); + condCost( inc, repId ); + condCost( min, repId ); + if ( max != 0 ) + condCost( max, repId ); + + /* Increment. */ + fsm->startFsmAction( 0, inc ); + + /* Max (optional). */ + if ( max != 0 ) { + FsmRes res = fsm->startFsmCondition( max, true ); + if ( !res.success() ) + return res; + } + + applyRepeatPriorGuard( fsm, repId ); + + /* Star. */ + FsmRes res = FsmAp::starOp( fsm ); + if ( !res.success() ) + return res; + + /* Restrict leaving. */ + res.fsm->leaveFsmCondition( min, true ); + + /* Init action. */ + res.fsm->startFromStateAction( 0, ini ); + + /* Leading priority guard. */ + applyEntryPriorGuard( res.fsm, repId ); + + return res; +} + +/* Remove duplicates of unique actions from an action table. */ +void FsmAp::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void FsmAp::removeActionDups() +{ + /* Loop all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + removeDups( trans->tdap()->actionTable ); + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) + removeDups( cond->actionTable ); + } + } + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + diff --git a/ragel/fsmgraph.h b/ragel/fsmgraph.h new file mode 100644 index 00000000..2429d923 --- /dev/null +++ b/ragel/fsmgraph.h @@ -0,0 +1,2541 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _FSMGRAPH_H +#define _FSMGRAPH_H + +#include "config.h" +#include "ragel.h" +#include "common.h" +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "dlistmel.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "avlmap.h" + +#include <assert.h> +#include <iostream> +#include <sstream> +#include <string> + + +/* Flags that control merging. */ +#define STB_GRAPH1 0x01 +#define STB_GRAPH2 0x02 +#define STB_BOTH 0x03 +#define STB_ISFINAL 0x04 +#define STB_ISMARKED 0x08 +#define STB_ONLIST 0x10 +#define STB_NFA_REP 0x20 + +using std::ostream; + +struct TransAp; +struct StateAp; +struct FsmAp; +struct Action; +struct LongestMatchPart; +struct LengthDef; +struct CondSpace; +struct FsmCtx; +struct InlineBlock; +struct InlineList; + +struct TooManyStates {}; + +struct PriorInteraction +{ + PriorInteraction( long long id ) : id(id) {} + long long id; +}; + +struct NfaRound +{ + NfaRound( long depth, long groups ) + : depth(depth), groups(groups) {} + + long depth; + long groups; +}; + +typedef Vector<NfaRound> NfaRoundVect; + +struct CondCostTooHigh +{ + CondCostTooHigh( long long costId ) + : costId(costId) {} + + long long costId; +}; + + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + StateAp *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Nodes in the tree that use this action. */ +struct NameInst; +struct InlineList; +typedef Vector<NameInst*> NameInstVect; + +struct ActionParam +{ + ActionParam( std::string name ) + : name(name) {} + + std::string name; +}; + +typedef Vector<ActionParam*> ActionParamList; + +typedef Vector<Action*> ActionArgList; + +struct CmpActionArgList +{ + static inline int compare( const ActionArgList *list1, const ActionArgList *list2 ) + { + return CmpTable<Action*>::compare( *list1, *list2 ); + } +}; + +typedef BstMap<ActionArgList*, Action*, CmpActionArgList> ActionArgListMap; +typedef BstMapEl<ActionArgList*, Action*> ActionArgListMapEl; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action>, + public AvlTreeEl<Action> +{ +public: + + Action( const InputLoc &loc, std::string name, InlineList *inlineList, int condId ) + : + loc(loc), + name(name), + inlineList(inlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + numNfaRefs(0), + anyCall(false), + isLmAction(false), + condId(condId), + costMark(false), + costId(0), + paramList(0), + argListMap(0), + substOf(0), + argList(0) + { + } + + ~Action(); + + static Action *cons( const InputLoc &loc, Action *substOf, + ActionArgList *argList, int condId ) + { + Action *action = new Action( loc, std::string(), 0, condId ); + action->substOf = substOf; + action->argList = argList; + action->inlineList = substOf->inlineList; + return action; + } + + /* Key for action dictionary. */ + std::string getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + std::string name; + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name.empty() ) + out << loc.line << ":" << loc.col; + else + out << name; + } + + /* Nodes in the name tree where the action is embedded. This serves as the + * root for name searches. Since actions can be used multiple times we use + * a vector. Name resolver deals with contracts. */ + NameInstVect embedRoots; + + /* Number of references in the final machine. */ + int numRefs() + { + return numTransRefs + numToStateRefs + + numFromStateRefs + numEofRefs + + numNfaRefs; + } + + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + int numNfaRefs; + bool anyCall; + + bool isLmAction; + int condId; + + bool costMark; + long long costId; + + ActionParamList *paramList; + ActionArgListMap *argListMap; + Action *substOf; + ActionArgList *argList; +}; + +struct CmpCondId +{ + static inline int compare( const Action *cond1, const Action *cond2 ) + { + if ( cond1->condId < cond2->condId ) + return -1; + else if ( cond1->condId > cond2->condId ) + return 1; + return 0; + } +}; + +/* A list of actions. */ +typedef DList<Action> ActionList; +typedef AvlTree<Action, std::string, CmpString> ActionDict; + +/* Structure for reverse action mapping. */ +struct RevActionMapEl +{ + char *name; + InputLoc location; +}; + + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; +typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, LongestMatchPart*, CmpOrd<int> > +{ + void setAction( int ordering, LongestMatchPart *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + PriorDesc() + : + key(0), + priority(0), + guarded(false), + guardId(0), + other(0) + {} + + int key; + int priority; + bool guarded; + long long guardId; + PriorDesc *other; + + PriorDesc *prev, *next; +}; + +typedef DList<PriorDesc> PriorDescList; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector<int> TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This class + * provides the iterator of a single list. */ +template <class Element> struct InList +{ + InList() : head(0) { } + + Element *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const InList &il ) : ptr(il.head) { } + Iter &operator=( const InList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator Element*() const { return ptr; } + Element &operator *() const { return *ptr; } + Element *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + Element *ptr; + }; +}; + +struct TransData +{ + TransData() + : + fromState(0), toState(0) + {} + + TransData( const TransData &other ) + : + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable), + lmActionTable(other.lmActionTable) + { + } + + StateAp *fromState; + StateAp *toState; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + + +/* The element for the sub-list within a TransAp. These specify the transitions + * and are keyed by the condition expressions. */ +struct CondAp + : public TransData +{ + CondAp( TransAp *transAp ) + : + TransData(), + transAp(transAp), + key(0) + {} + + CondAp( const CondAp &other, TransAp *transAp ) + : + TransData( other ), + transAp(transAp), + key(other.key) + { + } + + /* Owning transition. */ + TransAp *transAp; + + CondKey key; + + /* Pointers for outlist. */ + CondAp *prev, *next; + + /* Pointers for in-list. */ + CondAp *ilprev, *ilnext; +}; + +typedef DList<CondAp> CondList; + +struct TransCondAp; +struct TransDataAp; + +/* Transition class that implements actions and priorities. */ +struct TransAp +{ + TransAp() + : condSpace(0) {} + + TransAp( const TransAp &other ) + : + lowKey(other.lowKey), + highKey(other.highKey), + condSpace(other.condSpace) + { + } + + ~TransAp() + { + // delete condList.head; + // condList.abandon(); + } + + bool plain() const + { return condSpace == 0; } + + TransCondAp *tcap(); + TransDataAp *tdap(); + + long condFullSize(); + + Key lowKey, highKey; + + /* Which conditions are tested on this range. */ + CondSpace *condSpace; + + /* Pointers for outlist. */ + TransAp *prev, *next; +}; + +struct TransCondAp + : public TransAp +{ + TransCondAp() + : + TransAp() + {} + + TransCondAp( const TransCondAp &other ) + : + TransAp( other ), + condList() + {} + + ~TransCondAp() + { + condList.empty(); + } + + /* Cond trans list. Sorted by key value. */ + CondList condList; +}; + +struct TransDataAp + : public TransAp, public TransData +{ + TransDataAp() + : + TransAp(), + TransData() + {} + + TransDataAp( const TransDataAp &other ) + : + TransAp( other ), + TransData( other ) + {} + + /* Pointers for in-list. */ + TransDataAp *ilprev, *ilnext; +}; + +inline TransCondAp *TransAp::tcap() + { return this->condSpace != 0 ? static_cast<TransCondAp*>( this ) : 0; } + +inline TransDataAp *TransAp::tdap() + { return this->condSpace == 0 ? static_cast<TransDataAp*>( this ) : 0; } + +typedef DList<TransAp> TransList; + +/* Need the base vector type for accessing underlying remove function. */ +typedef BstSet<int> CondKeySet; +typedef Vector<int> CondKeyVect; + +/* State class that implements actions and priorities. */ + +struct NfaActions +{ + NfaActions( Action *push, Action *pop, int order ) + : push(push), pop(pop), order(order) {} + + Action *push; + Action *pop; + + int order; + + ActionTable pushTable; + ActionTable popTable; +}; + +struct NfaTrans +{ + NfaTrans( int order ) + : + fromState(0), + toState(0), + order(order), + popCondSpace(0) + { + } + + NfaTrans( const ActionTable &pushTable, + const ActionTable &restoreTable, + const ActionTable &popFrom, + CondSpace *popCondSpace, + const CondKeySet popCondKeys, + const ActionTable &popAction, + const ActionTable &popTable, + int order ) + : + fromState(0), toState(0), + order(order), + pushTable(pushTable), + restoreTable(restoreTable), + popFrom(popFrom), + popCondSpace(popCondSpace), + popCondKeys(popCondKeys), + popAction(popAction), + popTest(popTable) + {} + + NfaTrans( const NfaTrans &other ) + : + fromState(0), toState(0), + order(other.order), + pushTable(other.pushTable), + restoreTable(other.restoreTable), + popCondSpace(other.popCondSpace), + popCondKeys(other.popCondKeys), + popAction(other.popAction), + popTest(other.popTest), + priorTable(other.priorTable) + {} + + + StateAp *fromState; + StateAp *toState; + + int order; + + ActionTable pushTable; + ActionTable restoreTable; + + /* + * 1. Conditions transferred (always tested first) + * 2. Actions transferred + * 3. Pop actions created during epsilon draw. + */ + ActionTable popFrom; + CondSpace *popCondSpace; + CondKeySet popCondKeys; + + ActionTable popAction; + ActionTable popTest; + + PriorTable priorTable; + + NfaTrans *prev, *next; + NfaTrans *ilprev, *ilnext; +}; + + +typedef BstMap<StateAp*, NfaActions> NfaStateMap; +typedef BstMapEl<StateAp*, NfaActions> NfaStateMapEl; + +typedef DList<NfaTrans> NfaTransList; +typedef InList<NfaTrans> NfaInList; + +struct CmpNfaTrans +{ + static int compare( NfaTrans *t1, NfaTrans *t2 ) + { + /* This comparison is too strong. (okay to use something too strong -- + * we just don't find minimal). * */ + if ( t1->toState < t2->toState ) + return -1; + else if ( t1->toState > t2->toState ) + return 1; + else if ( t1->order < t2->order ) + return -1; + else if ( t1->order > t2->order ) + return 1; + else + { + int r = CmpActionTable::compare( t1->pushTable, t2->pushTable ); + if ( r != 0 ) + return r; + + r = CmpActionTable::compare( t1->restoreTable, t2->restoreTable ); + if ( r != 0 ) + return r; + + if ( t1->popCondSpace < t2->popCondSpace ) + return -1; + else if ( t1->popCondSpace > t2->popCondSpace ) + return 1; + + r = CmpTable<int>::compare( t1->popCondKeys, t2->popCondKeys ); + if ( r != 0 ) + return r; + + r = CmpActionTable::compare( t1->popTest, t2->popTest ); + if ( r != 0 ) + return r; + + r = CmpActionTable::compare( t1->popAction, t2->popAction ); + if ( r != 0 ) + return r; + } + + return 0; + } +}; + +struct CmpNfaTransList +{ + static int compare( const NfaTransList &l1, const NfaTransList &l2 ) + { + if ( l1.length() < l2.length() ) + return -1; + else if ( l1.length() > l2.length() ) + return 1; + else { + NfaTransList::Iter i1 = l1; + NfaTransList::Iter i2 = l2; + while ( i1.lte() ) { + int r = CmpNfaTrans::compare( i1, i2 ); + if ( r != 0 ) + return r; + i1++, i2++; + } + } + return 0; + } +}; + +struct CmpNfaStateMapEl +{ + static int compare( const NfaStateMapEl &el1, const NfaStateMapEl &el2 ) + { + if ( el1.key < el2.key ) + return -1; + else if ( el1.key > el2.key ) + return 1; + else if ( el1.value.push < el2.value.push ) + return -1; + else if ( el1.value.push > el2.value.push ) + return 1; + else if ( el1.value.pop < el2.value.pop ) + return -1; + else if ( el1.value.pop > el2.value.pop ) + return 1; + else if ( el1.value.order < el2.value.order ) + return -1; + else if ( el1.value.order > el2.value.order ) + return 1; + return 0; + } +}; + +/* Set of states, list of states. */ +typedef BstSet<StateAp*> StateSet; +typedef DList<StateAp> StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl<StateDictEl> +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + StateAp *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, TransAp *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + TransAp *value; +}; + +struct CmpKey +{ + CmpKey() + : keyOps(0) {} + + KeyOps *keyOps; + + int compare( const Key key1, const Key key2 ) + { + if ( keyOps->lt( key1, key2 ) ) + return -1; + else if ( keyOps->gt( key1, key2 ) ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +struct KeySet +: + public BstSet<Key, CmpKey> +{ + KeySet( KeyOps *keyOps ) + { + CmpKey::keyOps = keyOps; + } +}; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector<int> EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( StateAp *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + StateAp *targ; + bool leaving; +}; +typedef Vector<EptVectEl> EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet<int> EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet<LongestMatchPart*> LmItemSet; + +/* A Conditions which is to be + * transfered on pending out transitions. */ +struct OutCond +{ + OutCond( Action *action, bool sense ) + : action(action), sense(sense) {} + + Action *action; + bool sense; +}; + +struct CmpOutCond +{ + static int compare( const OutCond &outCond1, const OutCond &outCond2 ) + { + if ( outCond1.action < outCond2.action ) + return -1; + else if ( outCond1.action > outCond2.action ) + return 1; + else if ( outCond1.sense < outCond2.sense ) + return -1; + else if ( outCond1.sense > outCond2.sense ) + return 1; + return 0; + } +}; + +/* Conditions. */ +typedef BstSet< Action*, CmpCondId > CondSet; +typedef CmpTable< Action*, CmpCondId > CmpCondSet; + +struct CondSpace + : public AvlTreeEl<CondSpace> +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + long fullSize() + { return ( 1 << condSet.length() ); } + + CondSet condSet; + long condSpaceId; +}; + +typedef Vector<CondSpace*> CondSpaceVect; + +typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; + +typedef Vector<long> LongVect; + +struct CondData +{ + CondSpaceMap condSpaceMap; + + ~CondData() + { + condSpaceMap.empty(); + } +}; + +struct FsmGbl +{ + FsmGbl( const HostLang *hostLang ) + : + printStatistics(false), + errorCount(0), + displayPrintables(false), + hostLang(hostLang), + stringTables(false), + checkPriorInteraction(0), + wantDupsRemoved(true), + minimizeLevel(MinimizePartition2), + minimizeOpt(MinimizeMostOps) + {} + + bool printStatistics; + + /* + * Error reporting. + */ + + /* PROGNAME: txt */ + std::ostream &error(); + + /* file:loc: txt */ + std::ostream &error( const InputLoc &loc ); + + /* txt */ + std::ostream &error_plain(); + + /* file:loc: warning: txt */ + std::ostream &warning( const InputLoc &loc ); + + /* Stats reporting. */ + std::ostream &stats(); + + /* Requested info. */ + std::ostream &info(); + + std::stringstream libcerr; + std::stringstream libcout; + + int errorCount; + void abortCompile( int code ); + bool displayPrintables; + + const HostLang *hostLang; + bool stringTables; + bool checkPriorInteraction; + bool wantDupsRemoved; + + MinimizeLevel minimizeLevel; + MinimizeOpt minimizeOpt; +}; + +/* All FSM operations must be between machines that have been created using the + * same context object. */ +struct FsmCtx +{ + FsmCtx( FsmGbl *fsmGbl ); + ~FsmCtx(); + + KeyOps *keyOps; + CondData *condData; + MinimizeLevel minimizeLevel; + MinimizeOpt minimizeOpt; + + static const int STATE_UNLIMITED = 0; + + long stateLimit; + bool printStatistics; + bool checkPriorInteraction; + + bool unionOp; + + long condsCheckDepth; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + int nextPriorKey; + int nextCondId; + + PriorDesc *allocPriorDesc() + { + PriorDesc *priorDesc = new PriorDesc(); + priorDescList.append( priorDesc ); + return priorDesc; + } + + PriorDescList priorDescList; + + FsmGbl *fsmGbl; + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + ExportList exportList; + + bool generatingSectionSubset; + bool lmRequiresErrorState; + + /* Make name ids to name inst pointers. */ + NameInst **nameIndex; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + + /* Stack management */ + InlineBlock *prePushExpr; + InlineBlock *postPopExpr; + + /* Nfa stack managment. */ + InlineBlock *nfaPrePushExpr; + InlineBlock *nfaPostPopExpr; + + /* Overriding variables. */ + InlineList *pExpr; + InlineList *peExpr; + InlineList *eofExpr; + InlineList *csExpr; + InlineList *topExpr; + InlineList *stackExpr; + InlineList *actExpr; + InlineList *tokstartExpr; + InlineList *tokendExpr; + InlineList *dataExpr; + + Action *newNfaWrapAction( const char *name, InlineList *inlineList, Action *optWrap ); + void createNfaActions( FsmAp *fsm ); + + /* Checking the contents of actions. */ + void checkAction( Action *action ); + void checkInlineList( Action *act, InlineList *inlineList ); + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmAp *graph ); + + void finalizeInstance( FsmAp *graph ); + void prepareReduction( FsmAp *sectionGraph ); +}; + +typedef InList<CondAp> CondInList; +typedef InList<TransDataAp> TransInList; + +struct NfaStateEl +{ + StateAp *prev, *next; +}; + +typedef DListMel<StateAp, NfaStateEl> NfaStateList; + +struct StateAp + : public NfaStateEl +{ + StateAp(); + StateAp(const StateAp &other); + ~StateAp(); + + /* Is the state final? */ + bool isFinState() { return stateBits & STB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inTrans; + CondInList inCond; + + /* Set only during scanner construction when actions are added. NFA to DFA + * code can ignore this. */ + StateAp *eofTarget; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + StateAp *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + StateAp *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + StateSet *stateDictIn; + + NfaTransList *nfaOut; + NfaInList *nfaIn; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + StateAp *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this state. */ + CondSpace *outCondSpace; + CondKeySet outCondKeys; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; + + PriorTable guardedInTable; + + /* Used by the NFA-based scanner to track the origin of final states. We + * only use it in cases where just one match is possible, starting with the + * final state duplicates that are drawn using NFA transitions. */ + LmItemSet lmNfaParts; +}; + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: {} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: {} + +template <class Item> struct PiList +{ + PiList() + : ptr(0) {} + + PiList( const DList<Item> &l ) + : ptr(l.head) {} + + PiList( Item *ptr ) + : ptr(ptr) {} + + operator Item *() const { return ptr; } + Item *operator->() const { return ptr; } + + bool end() { return ptr == 0; } + void clear() { ptr = 0; } + + PiList next() + { return PiList( ptr->next ); } + + Item *ptr; +}; + +template <class Item> struct PiSingle +{ + PiSingle() + : ptr(0) {} + + PiSingle( Item *ptr ) + : ptr(ptr) {} + + operator Item *() const { return ptr; } + Item *operator->() const { return ptr; } + + bool end() { return ptr == 0; } + void clear() { ptr = 0; } + + /* Next is always nil. */ + PiSingle next() + { return PiSingle( 0 ); } + + Item *ptr; +}; + +template <class Item> struct PiVector +{ + PiVector() + : ptr(0), length(0) {} + + PiVector( const Vector<Item> &v ) + : ptr(v.data), length(v.length()) {} + + PiVector( Item *ptr, long length ) + : ptr(ptr), length(length) {} + + operator Item *() const { return ptr; } + Item *operator->() const { return ptr; } + + bool end() { return length == 0; } + void clear() { ptr = 0; length = 0; } + + PiVector next() + { return PiVector( ptr + 1, length - 1 ); } + + Item *ptr; + long length; +}; + + +template <class ItemIter1, class ItemIter2 = ItemIter1> struct ValPairIter +{ + /* Encodes the states that are meaningful to the of caller the iterator. */ + enum UserState + { + RangeInS1, RangeInS2, + RangeOverlap, + }; + + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + ExactOverlap, End + }; + + ValPairIter( const ItemIter1 &list1, const ItemIter2 &list2 ); + + template <class ItemIter> struct NextTrans + { + CondKey key; + ItemIter trans; + ItemIter next; + + NextTrans() { key = 0; } + + void load() { + if ( trans.end() ) + next.clear(); + else { + next = trans->next; + key = trans->key; + } + } + + void set( const ItemIter &t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } + }; + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ItemIter1 list1; + ItemIter2 list2; + IterState itState; + UserState userState; + + NextTrans<ItemIter1> s1Tel; + NextTrans<ItemIter2> s2Tel; + Key bottomLow, bottomHigh; + ItemIter1 *bottomTrans1; + ItemIter2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ItemIter1, class ItemIter2> + ValPairIter<ItemIter1, ItemIter2>:: + ValPairIter( const ItemIter1 &list1, const ItemIter2 &list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ItemIter1, class ItemIter2> + void ValPairIter<ItemIter1, ItemIter2>::findNext() +{ + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans.end() ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( !s2Tel.trans.end() ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans.end() ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( !s1Tel.trans.end() ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.key < s2Tel.key ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.key < s1Tel.key ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + +template <class ItemIter1, class ItemIter2 = ItemIter1> struct RangePairIter +{ + /* Encodes the states that are meaningful to the of caller the iterator. */ + enum UserState + { + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 + }; + + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + RangePairIter( FsmCtx *ctx, const ItemIter1 &list1, const ItemIter2 &list2 ); + + template <class ItemIter> struct NextTrans + { + Key lowKey, highKey; + ItemIter trans; + ItemIter next; + + NextTrans() + { + highKey = 0; + lowKey = 0; + } + + void load() { + if ( trans.end() ) + next.clear(); + else { + next = trans.next(); + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( const ItemIter &t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } + }; + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + FsmCtx *ctx; + + /* Iterator state. */ + ItemIter1 list1; + ItemIter2 list2; + IterState itState; + UserState userState; + + NextTrans<ItemIter1> s1Tel; + NextTrans<ItemIter2> s2Tel; + Key bottomLow, bottomHigh; + ItemIter1 bottomTrans1; + ItemIter2 bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ItemIter1, class ItemIter2> RangePairIter<ItemIter1, ItemIter2>:: + RangePairIter( FsmCtx *ctx, const ItemIter1 &list1, const ItemIter2 &list2 ) +: + ctx(ctx), + list1(list1), + list2(list2), + itState(Begin) +{ + bottomLow = 0; + bottomHigh = 0; + findNext(); +} + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ItemIter1, class ItemIter2> + void RangePairIter<ItemIter1, ItemIter2>::findNext() +{ + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans.end() ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( !s2Tel.trans.end() ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans.end() ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( !s1Tel.trans.end() ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( ctx->keyOps->lt( s1Tel.highKey, s2Tel.lowKey ) ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( ctx->keyOps->lt( s2Tel.highKey, s1Tel.lowKey ) ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( ctx->keyOps->lt( s1Tel.lowKey, s2Tel.lowKey ) ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + ctx->keyOps->decrement( s1Tel.highKey ); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( ctx->keyOps->lt( s2Tel.lowKey, s1Tel.lowKey ) ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + ctx->keyOps->decrement( s2Tel.highKey ); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( ctx->keyOps->lt( s1Tel.highKey, s2Tel.highKey ) ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + ctx->keyOps->increment( bottomLow ); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( ctx->keyOps->lt( s2Tel.highKey, s1Tel.highKey ) ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + ctx->keyOps->increment( bottomLow ); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } + int compare( const StateAp *pState1, const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } + int compare( const StateAp *pState1, const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } + int compare( const StateAp *pState1, const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare( FsmCtx *ctx ) : ctx(ctx) { } + bool shouldMark( MarkIndex &markIndex, const StateAp *pState1, + const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector<TransEl> TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, StateAp* > EntryMapEl; +typedef BstMap< int, StateAp* > EntryMap; +typedef Vector<EntryMapEl> EntryMapBase; + +struct BreadthCost +{ + BreadthCost( std::string name, double cost ) + : name(name), cost(cost) {} + + std::string name; + double cost; +}; + +struct BreadthResult +{ + BreadthResult( double start ) : start(start) {} + + double start; + Vector<BreadthCost> costs; +}; + +/* Result of an operation. */ +struct FsmRes +{ + struct Fsm {}; + struct TooManyStates {}; + struct PriorInteraction {}; + struct CondCostTooHigh {}; + struct InternalError {}; + + enum Type + { + TypeFsm = 1, + TypeTooManyStates, + TypePriorInteraction, + TypeCondCostTooHigh, + TypeInternalError, + }; + + FsmRes( const Fsm &, FsmAp *fsm ) + : fsm(fsm), type(TypeFsm) {} + + FsmRes( const TooManyStates & ) + : fsm(0), type(TypeTooManyStates) {} + + FsmRes( const PriorInteraction &, long long guardId ) + : fsm(0), type(TypePriorInteraction), id(guardId) {} + + FsmRes( const CondCostTooHigh &, long long costId ) + : fsm(0), type(TypeCondCostTooHigh), id(costId) {} + + FsmRes( const InternalError & ) + : fsm(0), type(TypeInternalError) {} + + bool success() + { return fsm != 0; } + + operator FsmAp*() + { return type == TypeFsm ? fsm : 0; } + FsmAp *operator->() + { return type == TypeFsm ? fsm : 0; } + + FsmAp *fsm; + Type type; + long long id; +}; + +/* Graph class that implements actions and priorities. */ +struct FsmAp +{ + /* Constructors/Destructors. */ + FsmAp( FsmCtx *ctx ); + FsmAp( const FsmAp &graph ); + ~FsmAp(); + + FsmCtx *ctx; + + bool priorInteraction; + int guardId; + + /* The list of states. */ + StateList stateList; + StateList misfitList; + NfaStateList nfaList; + StateDict stateDict; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + StateAp *startState; + + /* Error state, possibly created only when the final machine has been + * created and the XML machine is about to be written. No transitions + * point to this state. */ + StateAp *errState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferOutActions( StateAp *state ); + void transferErrorActions( StateAp *state, int transferPoint ); + void setErrorActions( StateAp *state, const ActionTable &other ); + void setErrorAction( StateAp *state, int ordering, Action *action ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( StateAp *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, LongestMatchPart *lmPart ); + + /* Set conditions. */ + CondSpace *addCondSpace( const CondSet &condSet ); + + void convertToCondAp( StateAp *state ); + +private: + /* Can generate states. */ + void doEmbedCondition( StateAp *state, + const CondSet &set, const CondKeySet &vals ); + + +public: + static FsmRes embedCondition( FsmAp *fsm, StateAp *state, const CondSet &set, + const CondKeySet &vals ); + + FsmRes startFsmCondition( Action *condAction, bool sense ); + void allTransCondition( Action *condAction, bool sense ); + void leaveFsmCondition( Action *condAction, bool sense ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( StateAp *state ); + void unsetFinState( StateAp *state ); + + void setStartState( StateAp *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, StateAp *state ); + void changeEntry( int id, StateAp *to, StateAp *from ); + void unsetEntry( int id, StateAp *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + + void checkEpsilonRegularInteraction( const PriorTable &t1, const PriorTable &t2 ); + +private: + /* Can generate staes. */ + void shadowReadWriteStates(); + + void afterOpMinimize( bool lastInSeq = true ); + + void removeDups( ActionTable &table ); + +public: + + void removeActionDups(); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + template < class Head > void attachToInList( StateAp *from, + StateAp *to, Head *&head, Head *trans ); + template < class Head > void detachFromInList( StateAp *from, + StateAp *to, Head *&head, Head *trans ); + + void attachToNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ); + void detachFromNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ); + + void attachStateDict( StateAp *from, StateAp *to ); + void detachStateDict( StateAp *from, StateAp *to ); + + /* Attach with a new transition. */ + CondAp *attachNewCond( TransAp *trans, StateAp *from, + StateAp *to, CondKey onChar ); + TransAp *attachNewTrans( StateAp *from, StateAp *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( StateAp *from, StateAp *to, TransDataAp *trans ); + void attachTrans( StateAp *from, StateAp *to, CondAp *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( StateAp *from, StateAp *to, TransDataAp *trans ); + void redirectErrorTrans( StateAp *from, StateAp *to, CondAp *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( StateAp *from, StateAp *to, TransDataAp *trans ); + void detachTrans( StateAp *from, StateAp *to, CondAp *trans ); + + /* Detach a state from the graph. */ + void detachState( StateAp *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + TransDataAp *dupTransData( StateAp *from, TransDataAp *srcTrans ); + TransAp *dupTrans( StateAp *from, TransAp *srcTrans ); + CondAp *dupCondTrans( StateAp *from, TransAp *destParent, CondAp *srcTrans ); + +private: + /* In crossing, two transitions both go to real states. Can generate + * states. */ + template< class Trans > Trans *fsmAttachStates( + StateAp *from, Trans *destTrans, Trans *srcTrans ); + +public: + void expandConds( StateAp *fromState, TransAp *trans, + CondSpace *fromSpace, CondSpace *mergedSpace ); + TransAp *copyTransForExpansion( StateAp *fromState, TransAp *srcTrans ); + StateAp *copyStateForExpansion( StateAp *srcState ); + void freeEffectiveTrans( TransAp *srcTrans ); + +private: + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. Can generate states. */ + template< class Trans > Trans *mergeTrans( StateAp *from, + Trans *destTrans, Trans *srcTrans ); + +public: + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + void addOutCondition( StateAp *state, Action *condAction, bool sense ); + + void expandCondKeys( CondKeySet &condKeys, CondSpace *fromSpace, + CondSpace *mergedSpace ); + + /* Back to trans ap (minimmization) */ + TransDataAp *convertToTransAp( StateAp *from, CondAp *cond ); + + /* Cross a src transition with one that is already occupying a spot. */ + TransCondAp *convertToCondAp( StateAp *state, TransDataAp *trans ); + CondSpace *expandCondSpace( TransAp *destTrans, TransAp *srcTrans ); + +private: + /* Can generate states. */ + TransAp *crossTransitions( StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + TransDataAp *crossTransitionsBothPlain( StateAp *from, + TransDataAp *destTrans, TransDataAp *srcTrans ); + CondAp *crossCondTransitions( StateAp *from, + TransAp *destParent, CondAp *destTrans, CondAp *srcTrans ); + +public: + void prepareNfaRound(); + void finalizeNfaRound(); + + void outTransCopy( StateAp *dest, TransAp *srcList ); + void nfaMergeStates( StateAp *destState, StateAp **srcStates, int numSrc ); + void mergeOutConds( StateAp *destState, StateAp *srcState, bool leaving = false ); + void checkPriorInteractions( StateAp *destState, StateAp *srcState ); + void mergeNfaTransitions( StateAp *destState, StateAp *srcState ); + void mergeStateProperties( StateAp *destState, StateAp *srcState ); + void mergeStatesLeaving( StateAp *destState, StateAp *srcState ); + void mergeStateBits( StateAp *destState, StateAp *srcState ); + void mergeStates( StateAp *destState, StateAp *srcState, bool leaving = false ); + + /* Merge a set of states into destState. */ + void mergeStateList( StateAp *destState, StateAp **srcStates, int numSrc ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void cleanAbortedFill( StateAp *state ); + void cleanAbortedFill(); + bool overStateLimit(); + void nfaFillInStates(); + + /* + * Transition Comparison. + */ + + template< class Trans > int compareCondBitElim( Trans *trans1, Trans *trans2 ); + template< class Trans > int compareCondBitElimPtr( Trans *trans1, Trans *trans2 ); + int compareCondListBitElim( const CondList &condList1, const CondList &condList2 ); + + /* Compare priority and function table of transitions. */ + static int compareTransData( TransAp *trans1, TransAp *trans2 ); + template< class Trans > static int compareCondData( Trans *trans1, Trans *trans2 ); + + /* Compare transition data. Either of the pointers may be null. */ + static int compareTransDataPtr( TransAp *trans1, TransAp *trans2 ); + template< class Trans > static int compareCondDataPtr( Trans *trans1, Trans *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static int compareFullPtr( TransAp *trans1, TransAp *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static int compareTransPartPtr( TransAp *trans1, TransAp *trans2 ); + template< class Trans > static int compareCondPartPtr( Trans *trans1, Trans *trans2 ); + + static int comparePart( TransAp *trans1, TransAp *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static bool shouldMarkPtr( MarkIndex &markIndex, + TransAp *trans1, TransAp *trans2 ); + + /* + * Callbacks. + */ + + /* Add in the properties of srcTrans into this. */ + template< class Trans > void addInTrans( Trans *destTrans, Trans *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const StateAp *state1, const StateAp *state2 ); + + /* Out transition data. */ + void clearOutData( StateAp *state ); + bool hasOutData( StateAp *state ); + void transferOutData( StateAp *destState, StateAp *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + StateAp *addState(); + + /* + * Building basic machines + */ + + static FsmAp *concatFsm( FsmCtx *ctx, Key c ); + static FsmAp *concatFsmCI( FsmCtx *ctx, Key c ); + static FsmAp *concatFsm( FsmCtx *ctx, Key *str, int len ); + static FsmAp *concatFsmCI( FsmCtx *ctx, Key *str, int len ); + static FsmAp *orFsm( FsmCtx *ctx, Key *set, int len ); + static FsmAp *rangeFsm( FsmCtx *ctx, Key low, Key high ); + static FsmAp *rangeFsmCI( FsmCtx *ctx, Key low, Key high ); + static FsmAp *rangeStarFsm( FsmCtx *ctx, Key low, Key high ); + static FsmAp *emptyFsm( FsmCtx *ctx ); + static FsmAp *lambdaFsm( FsmCtx *ctx ); + static FsmAp *dotFsm( FsmCtx *ctx ); + static FsmAp *dotStarFsm( FsmCtx *ctx ); + static FsmAp *notRangeFsm( FsmCtx *ctx, Key low, Key high ); + + /* + * Fsm operators. + */ + + static FsmRes starOp( FsmAp *fsm ); + static FsmRes plusOp( FsmAp *fsm ); + static FsmRes questionOp( FsmAp *fsm ); + + static FsmRes exactRepeatOp( FsmAp *fsm, int times ); + static FsmRes maxRepeatOp( FsmAp *fsm, int times ); + static FsmRes minRepeatOp( FsmAp *fsm, int times ); + static FsmRes rangeRepeatOp( FsmAp *fsm, int lower, int upper ); + + static FsmRes concatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true, + StateSet *fromStates = 0, bool optional = false ); + static FsmRes unionOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + static FsmRes intersectOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + static FsmRes subtractOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + static FsmRes epsilonOp( FsmAp *fsm ); + static FsmRes joinOp( FsmAp *fsm, int startId, int finalId, FsmAp **others, int numOthers ); + + static FsmRes rightStartConcatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + + void transferOutToNfaTrans( NfaTrans *trans, StateAp *state ); + + enum NfaRepeatMode { + NfaLegacy = 1, + NfaGreedy, + NfaLazy + }; + + static FsmRes applyNfaTrans( FsmAp *fsm, StateAp *fromState, StateAp *toState, NfaTrans *nfaTrans ); + + /* Results in an NFA. */ + static FsmRes nfaUnionOp( FsmAp *fsm, FsmAp **others, int n, int depth, std::ostream &stats ); + static FsmRes nfaRepeatOp( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit ); + + static FsmRes nfaRepeatOp2( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit, NfaRepeatMode mode = NfaGreedy ); + static FsmRes nfaWrap( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *exit, NfaRepeatMode mode = NfaGreedy ); + + static FsmRes nfaUnion( const NfaRoundVect &roundsList, FsmAp **machines, + int numMachines, std::ostream &stats, bool printStatistics ); + + static FsmRes condPlus( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ); + static FsmRes condStar( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ); + + /* Make a new start state that has no entry points. Will not change the + * meaning of the fsm. */ + static FsmRes isolateStartState( FsmAp *fsm ); + + /* + * Analysis Functions + */ + static FsmRes condCostFromState( FsmAp *fsm, StateAp *state, long depth ); + static FsmRes condCostSearch( FsmAp *fsm ); + static void breadthFromEntry( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state ); + static void breadthFromState( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state, + long depth, int maxDepth, double stateScore); + + /* + * Operator workers + */ + void globOp( FsmAp **others, int numOthers ); + void deterministicEntry(); + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * meaning of the fsm. */ + StateAp *dupStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, StateAp *targ ); + void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ); + void resolveEpsilonTrans(); + + static bool fillAbort( FsmRes &res, FsmAp *fsm ); + + static FsmRes fillInStates( FsmAp *fsm ); + + /* Workers for concatenation and union. */ + static FsmRes doUnion( FsmAp *fsm, FsmAp *other ); + static FsmRes doConcat( FsmAp *fsm, FsmAp *other, StateSet *fromStates, bool optional ); + + static void condCost( Action *action, long repId ); + static void applyEntryPriorGuard( FsmAp *fsm, long repId ); + static void applyRepeatPriorGuard( FsmAp *fsm, long repId ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmAp *other ); + + /* Ordering states. */ + void depthFirstOrdering( StateAp *state ); + void depthFirstOrdering(); + void sortStatesByFinal(); + + /* Set sqequential state numbers starting at 0. */ + void setStateNumbers( int base ); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + void unsetFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( StateAp *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( StateAp *state ); + void markReachableFromHereStopFinal( StateAp *state ); + + /* Any transitions to another state? */ + bool anyRegularTransitions( StateAp *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + long removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( StateAp *state ); + bool anyErrorRange( StateAp *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void moveInwardTrans(StateAp *dest, StateAp *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates( StateAp *dest, StateAp *src ); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); + + /* Returns true if there is a transtion (either explicit or by a gap) to + * the error state. */ + bool checkErrTrans( StateAp *state, TransAp *trans ); + bool checkErrTrans( StateAp *state, CondAp *trans ); + bool checkErrTransFinish( StateAp *state ); + bool hasErrorTrans(); + + /* Check if a machine defines a single character. This is useful in + * validating ranges and machines to export. */ + bool checkSingleCharMachine( ); + + bool elimCondBits(); +}; + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +template< class Trans > void FsmAp::addInTrans( Trans *destTrans, Trans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +template< class Trans > int FsmAp::compareCondDataPtr( Trans *trans1, Trans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareCondData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +template< class Trans > int FsmAp::compareCondBitElimPtr( Trans *trans1, Trans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareCondBitElim( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +#endif diff --git a/ragel/fsmmin.cc b/ragel/fsmmin.cc new file mode 100644 index 00000000..cabe3968 --- /dev/null +++ b/ragel/fsmmin.cc @@ -0,0 +1,934 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" +#include "mergesort.h" + +struct MergeSortInitPartition + : public MergeSort<StateAp*, InitPartitionCompare> +{ + MergeSortInitPartition( FsmCtx *ctx ) + { + InitPartitionCompare::ctx = ctx; + } +}; + +struct MergeSortPartition + : public MergeSort<StateAp*, PartitionCompare> +{ + MergeSortPartition( FsmCtx *ctx ) + { + PartitionCompare::ctx = ctx; + } +}; + +struct MergeSortApprox + : public MergeSort<StateAp*, ApproxCompare> +{ + MergeSortApprox( FsmCtx *ctx ) + { + ApproxCompare::ctx = ctx; + } +}; + +int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSortPartition mergeSort( ctx ); + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + StateAp *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmAp::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSortInitPartition mergeSort( ctx ); + InitPartitionCompare initPartCompare( ctx ); + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSortPartition mergeSort( ctx ); + PartitionCompare partCompare( ctx ); + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + StateAp *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter t = state->inTrans; t.lte(); t++ ) { + MinPartition *fromPart = t->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + for ( CondInList::Iter t = state->inCond; t.lte(); t++ ) { + MinPartition *fromPart = t->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmAp::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSortInitPartition mergeSort( ctx ); + InitPartitionCompare initPartCompare( ctx ); + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmAp::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + StateAp *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare( ctx ); + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +#ifdef TO_UPGRADE_CONDS +bool FsmAp::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + StateAp *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare( ctx ); + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} +#endif + +#ifdef TO_UPGRADE_CONDS +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmAp::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers( 0 ); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} +#endif + +#ifdef TO_UPGRADE_CONDS +bool FsmAp::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSortApprox mergeSort( ctx ); + ApproxCompare approxCompare( ctx ); + + /* Fill up an array of pointers to the states. */ + StateAp **statePtrs = new StateAp*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + StateAp **pLast = statePtrs; + StateAp **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} +#endif + +#ifdef TO_UPGRADE_CONDS +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmAp::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} +#endif + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +long FsmAp::removeUnreachableStates() +{ + long origLen = stateList.length(); + + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state ) { + StateAp *next = state->next; + + if ( state->stateBits & STB_ISMARKED ) + state->stateBits &= ~ STB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } + + return origLen - stateList.length(); +} + +bool FsmAp::outListCovers( StateAp *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( ctx->keyOps->lt( ctx->keyOps->minKey, trans->lowKey ) ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + ctx->keyOps->decrement( lowKey ); + if ( ctx->keyOps->lt( trans->prev->highKey, lowKey ) ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( ctx->keyOps->lt( trans->highKey, ctx->keyOps->maxKey ) ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmAp::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + StateAp **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= STB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state != 0 ) { + StateAp *next = state->next; + + if ( state->stateBits & STB_ISMARKED ) + state->stateBits &= ~ STB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmAp::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + StateAp *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + moveInwardTrans( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + StateAp *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmAp::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + StateAp *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + StateAp *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + +/* Merge neighboring transitions that go to the same state and have the same + * transitions data. */ +void FsmAp::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + ctx->keyOps->decrement( nextLow ); + + /* Require there be no conditions in either of the merge + * candidates. */ + bool merge = false; + TransDataAp *td; + TransDataAp *tn; + + if ( trans->plain() && + next->plain() && + ctx->keyOps->eq( trans->highKey, nextLow ) ) + { + td = trans->tdap(); + tn = next->tdap(); + + /* Check the condition target and action data. */ + if ( td->toState == tn->toState && CmpActionTable::compare( + td->actionTable, tn->actionTable ) == 0 ) + { + merge = true; + } + } + + if ( merge ) { + trans->highKey = next->highKey; + st->outList.detach( tn ); + detachTrans( tn->fromState, tn->toState, tn ); + delete tn; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} + +bool FsmAp::elimCondBits() +{ + bool modified = false; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + restart: + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( !trans->plain() ) { + CondSpace *cs = trans->condSpace; + + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) { + long bit = 1 << csi.pos(); + + /* Sort into on and off lists. */ + CondList on; + CondList off; + TransCondAp *tcap = trans->tcap(); + while ( tcap->condList.length() > 0 ) { + CondAp *cond = tcap->condList.detachFirst(); + if ( cond->key.getVal() & bit ) { + cond->key = CondKey( cond->key.getVal() & ~bit ); + on.append( cond ); + } + else { + off.append( cond ); + } + } + + bool merge = false; + if ( on.length() > 0 && on.length() == off.length() ) { + /* test if the same */ + int cmpRes = compareCondListBitElim( on, off ); + if ( cmpRes == 0 ) + merge = true; + } + + if ( merge ) { + if ( cs->condSet.length() == 1 ) { + /* clear out the on-list. */ + while ( on.length() > 0 ) { + CondAp *cond = on.detachFirst(); + detachTrans( st, cond->toState, cond ); + } + + /* turn back into a plain transition. */ + CondAp *cond = off.detachFirst(); + TransAp *n = convertToTransAp( st, cond ); + TransAp *before = trans->prev; + st->outList.detach( trans ); + st->outList.addAfter( before, n ); + modified = true; + goto restart; + } + else + { + CondSet newSet = cs->condSet; + newSet.Vector<Action*>::remove( csi.pos(), 1 ); + trans->condSpace = addCondSpace( newSet ); + + /* clear out the on-list. */ + while ( on.length() > 0 ) { + CondAp *cond = on.detachFirst(); + detachTrans( st, cond->toState, cond ); + } + } + } + + /* Turn back into a single list. */ + while ( on.length() > 0 || off.length() > 0 ) { + if ( on.length() == 0 ) { + while ( off.length() > 0 ) + tcap->condList.append( off.detachFirst() ); + } + else if ( off.length() == 0 ) { + while ( on.length() > 0 ) { + CondAp *cond = on.detachFirst(); + cond->key = CondKey( cond->key.getVal() | bit ); + tcap->condList.append( cond ); + } + } + else { + if ( off.head->key.getVal() < ( on.head->key.getVal() | bit ) ) { + tcap->condList.append( off.detachFirst() ); + } + else { + CondAp *cond = on.detachFirst(); + cond->key = CondKey( cond->key.getVal() | bit ); + tcap->condList.append( cond ); + } + } + } + + if ( merge ) { + modified = true; + goto restart; + } + } + } + } + } + return modified; +} + +/* Perform minimization after an operation according + * to the command line args. */ +void FsmAp::afterOpMinimize( bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( ctx->minimizeOpt == MinimizeEveryOp || ( ctx->minimizeOpt == MinimizeMostOps && lastInSeq ) ) { + /* First clean up the graph. FsmAp operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + removeUnreachableStates(); + + switch ( ctx->minimizeLevel ) { + #ifdef TO_UPGRADE_CONDS + case MinimizeApprox: + minimizeApproximate(); + break; + #endif + case MinimizePartition1: + minimizePartition1(); + break; + case MinimizePartition2: + minimizePartition2(); + break; + #ifdef TO_UPGRADE_CONDS + case MinimizeStable: + minimizeStable(); + break; + #endif + } + } +} + diff --git a/ragel/fsmnfa.cc b/ragel/fsmnfa.cc new file mode 100644 index 00000000..cde4f82d --- /dev/null +++ b/ragel/fsmnfa.cc @@ -0,0 +1,660 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "parsedata.h" + +using std::endl; + +void FsmAp::nfaFillInStates() +{ + long count = nfaList.length(); + + /* Can this lead to too many DFAs? Since the nfa merge is removing misfits, + * it is possible we remove a state that is on the nfa list, but we don't + * adjust count. */ + + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + while ( nfaList.length() > 0 && count-- ) { + StateAp *state = nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + nfaMergeStates( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + nfaList.detach( state ); + } +} + +void FsmAp::prepareNfaRound() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 && ! (st->stateBits & STB_NFA_REP) ) { + StateSet set; + for ( NfaTransList::Iter to = *st->nfaOut; to.lte(); to++ ) + set.insert( to->toState ); + + st->stateDictEl = new StateDictEl( set ); + st->stateDictEl->targState = st; + stateDict.insert( st->stateDictEl ); + delete st->nfaOut; + st->nfaOut = 0; + + nfaList.append( st ); + } + } +} + +void FsmAp::finalizeNfaRound() +{ + /* For any remaining NFA states, remove from the state dict. We need to + * keep the state sets. */ + for ( NfaStateList::Iter ns = nfaList; ns.lte(); ns++ ) + stateDict.detach( ns->stateDictEl ); + + /* Disassociate non-nfa states from their state dicts. */ + for ( StateDict::Iter sdi = stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete the state dict elements for non-nfa states. */ + stateDict.empty(); + + /* Transfer remaining stateDictEl sets to nfaOut. */ + while ( nfaList.length() > 0 ) { + StateAp *state = nfaList.head; + state->nfaOut = new NfaTransList; + for ( StateSet::Iter ss = state->stateDictEl->stateSet; ss.lte(); ss++ ) { + /* Attach it using the NFA transitions data structure (propigates + * to output). */ + NfaTrans *trans = new NfaTrans( /* 0, 0, */ 1 ); + state->nfaOut->append( trans ); + attachToNfa( state, *ss, trans ); + + detachStateDict( state, *ss ); + } + delete state->stateDictEl; + state->stateDictEl = 0; + nfaList.detach( state ); + } +} + +void FsmAp::nfaMergeStates( StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) { + mergeStates( destState, srcStates[s] ); + + while ( misfitList.length() > 0 ) { + StateAp *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + misfitList.detach( state ); + delete state; + } + } +} + + +/* + * WRT action ordering. + * + * All the pop restore actions get an ordering of -2 to cause them to always + * execute first. This is the action that restores the state and we need that + * to happen before any user actions. + */ +const int ORD_PUSH = 0; +const int ORD_RESTORE = -3; +const int ORD_COND = -1; +const int ORD_COND2 = -2; +const int ORD_TEST = 1073741824; + +void FsmAp::transferOutToNfaTrans( NfaTrans *trans, StateAp *state ) +{ + trans->popFrom = state->fromStateActionTable; + trans->popCondSpace = state->outCondSpace; + trans->popCondKeys = state->outCondKeys; + trans->priorTable.setPriors( state->outPriorTable ); + trans->popAction.setActions( state->outActionTable ); +} + +FsmRes FsmAp::nfaWrap( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *exit, NfaRepeatMode mode ) +{ + /* + * First Concat. + */ + StateSet origFinals = fsm->finStateSet; + + /* Get the orig start state. */ + StateAp *origStartState = fsm->startState; + + /* New start state. */ + StateAp *newStart = fsm->addState(); + + newStart->nfaOut = new NfaTransList; + + const int orderInit = 0; + const int orderStay = mode == NfaGreedy ? 3 : 1; + const int orderExit = mode == NfaGreedy ? 1 : 3; + + NfaTrans *trans; + if ( init ) { + /* Transition into the repetition. Doesn't make much sense to flip this + * statically false, but provided for consistency of interface. Allows + * an init so we can have only local state manipulation. */ + trans = new NfaTrans( orderInit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, init ); + + newStart->nfaOut->append( trans ); + fsm->attachToNfa( newStart, origStartState, trans ); + } + + StateAp *newFinal = fsm->addState(); + + for ( StateSet::Iter orig = origFinals; orig.lte(); orig++ ) { + /* For every final state, we place a new final state in front of it, + * with an NFA transition to the original. This is the "stay" choice. */ + StateAp *repl = fsm->addState(); + fsm->moveInwardTrans( repl, *orig ); + + repl->nfaOut = new NfaTransList; + + if ( stay != 0 ) { + /* Transition to original final state. Represents staying. */ + trans = new NfaTrans( orderStay ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, stay ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, *orig, trans ); + } + + if ( exit != 0 ) { + /* Transition to thew new final. Represents exiting. */ + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, newFinal, trans ); + } + + fsm->unsetFinState( *orig ); + } + + fsm->unsetStartState(); + fsm->setStartState( newStart ); + fsm->setFinState( newFinal ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +FsmRes FsmAp::nfaRepeatOp2( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit, NfaRepeatMode mode ) +{ + /* + * First Concat. + */ + StateSet origFinals = fsm->finStateSet; + + /* Get the orig start state. */ + StateAp *origStartState = fsm->startState; + StateAp *repStartState = fsm->dupStartState(); + + /* New start state. */ + StateAp *newStart1 = fsm->addState(); + StateAp *newStart2 = fsm->addState(); + + newStart1->nfaOut = new NfaTransList; + newStart2->nfaOut = new NfaTransList; + + const int orderInit = 0; + const int orderStay = mode == NfaGreedy ? 3 : 1; + const int orderRepeat = mode == NfaGreedy ? 2 : 2; + const int orderExit = mode == NfaGreedy ? 1 : 3; + + NfaTrans *trans; + if ( init ) { + /* Transition into the repetition. Doesn't make much sense to flip this + * statically false, but provided for consistency of interface. Allows + * an init so we can have only local state manipulation. */ + trans = new NfaTrans( orderInit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, init ); + + newStart1->nfaOut->append( trans ); + fsm->attachToNfa( newStart1, newStart2, trans ); + } + + StateAp *newFinal = fsm->addState(); + + if ( exit ) { + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + newStart2->nfaOut->append( trans ); + fsm->attachToNfa( newStart1, newFinal, trans ); + } + + if ( repeat ) { + trans = new NfaTrans( orderRepeat ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, repeat ); + + newStart2->nfaOut->append( trans ); + fsm->attachToNfa( newStart1, origStartState, trans ); + } + + for ( StateSet::Iter orig = origFinals; orig.lte(); orig++ ) { + /* For every final state, we place a new final state in front of it, + * with an NFA transition to the original. This is the "stay" choice. */ + StateAp *repl = fsm->addState(); + fsm->moveInwardTrans( repl, *orig ); + + repl->nfaOut = new NfaTransList; + + if ( stay != 0 ) { + /* Transition to original final state. Represents staying. */ + trans = new NfaTrans( orderStay ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, stay ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, *orig, trans ); + } + + /* Transition back to the start. Represents repeat. */ + if ( repeat != 0 ) { + trans = new NfaTrans( orderRepeat ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, repeat ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, repStartState, trans ); + } + + if ( exit != 0 ) { + /* Transition to thew new final. Represents exiting. */ + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, newFinal, trans ); + } + + fsm->unsetFinState( *orig ); + } + + fsm->unsetStartState(); + fsm->setStartState( newStart1 ); + fsm->setFinState( newFinal ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* This version contains the init, increment and test in the nfa pop actions. + * This is a compositional operator since it doesn't leave any actions to + * trailing characters, where they may interact with other actions that use the + * same variables. */ +FsmRes FsmAp::nfaRepeatOp( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit ) +{ + /* + * First Concat. + */ + StateSet origFinals = fsm->finStateSet; + + /* Get the orig start state. */ + StateAp *origStartState = fsm->startState; + StateAp *repStartState = fsm->dupStartState(); + + /* New start state. */ + StateAp *newStart = fsm->addState(); + + newStart->nfaOut = new NfaTransList; + + NfaTrans *trans; + if ( init ) { + /* Transition into the repetition. Doesn't make much sense to flip this + * statically false, but provided for consistency of interface. Allows + * an init so we can have only local state manipulation. */ + trans = new NfaTrans( 1 ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, init ); + + newStart->nfaOut->append( trans ); + fsm->attachToNfa( newStart, origStartState, trans ); + } + + StateAp *newFinal = fsm->addState(); + + for ( StateSet::Iter orig = origFinals; orig.lte(); orig++ ) { + /* For every final state, we place a new final state in front of it, + * with an NFA transition to the original. This is the "stay" choice. */ + StateAp *repl = fsm->addState(); + fsm->moveInwardTrans( repl, *orig ); + + repl->nfaOut = new NfaTransList; + + const int orderStay = 3; + const int orderRepeat = 2; + const int orderExit = 1; + + if ( stay != 0 ) { + /* Transition to original final state. Represents staying. */ + trans = new NfaTrans( orderStay ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, stay ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, *orig, trans ); + } + + /* Transition back to the start. Represents repeat. */ + if ( repeat != 0 ) { + trans = new NfaTrans( orderRepeat ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, repeat ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, repStartState, trans ); + } + + if ( exit != 0 ) { + /* Transition to thew new final. Represents exiting. */ + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, newFinal, trans ); + } + + fsm->unsetFinState( *orig ); + } + + fsm->unsetStartState(); + fsm->setStartState( newStart ); + fsm->setFinState( newFinal ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* Unions others with fsm. Others are deleted. */ +FsmRes FsmAp::nfaUnionOp( FsmAp *fsm, FsmAp **others, int n, int depth, ostream &stats ) +{ + /* Mark existing NFA states as NFA_REP states, which excludes them from the + * prepare NFA round. We must treat them as final NFA states and not try to + * make them deterministic. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) + st->stateBits |= STB_NFA_REP; + } + + for ( int o = 0; o < n; o++ ) { + for ( StateList::Iter st = others[o]->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) + st->stateBits |= STB_NFA_REP; + } + } + + for ( int o = 0; o < n; o++ ) + assert( fsm->ctx == others[o]->ctx ); + + /* Not doing misfit accounting here. If we wanted to, it would need to be + * made nfa-compatibile. */ + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( fsm->startState ); + for ( int o = 0; o < n; o++ ) + startStateSet.insert( others[o]->startState ); + + /* Both of the original start states loose their start state status. */ + fsm->unsetStartState(); + for ( int o = 0; o < n; o++ ) + others[o]->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + for ( int o = 0; o < n; o++ ) { + fsm->copyInEntryPoints( others[o] ); + others[o]->entryPoints.empty(); + } + + for ( int o = 0; o < n; o++ ) { + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + fsm->stateList.append( others[o]->stateList ); + fsm->misfitList.append( others[o]->misfitList ); + // nfaList.append( others[o]->nfaList ); + } + + for ( int o = 0; o < n; o++ ) { + /* Move the final set data from other into this. */ + fsm->finStateSet.insert( others[o]->finStateSet ); + others[o]->finStateSet.empty(); + } + + for ( int o = 0; o < n; o++ ) { + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[o]; + } + + /* Create a new start state. */ + fsm->setStartState( fsm->addState() ); + + if ( depth == 0 ) { + fsm->startState->stateDictEl = new StateDictEl( startStateSet ); + fsm->nfaList.append( fsm->startState ); + + for ( StateSet::Iter s = startStateSet; s.lte(); s++ ) { + NfaTrans *trans = new NfaTrans( /* 0, 0, */ 0 ); + + if ( fsm->startState->nfaOut == 0 ) + fsm->startState->nfaOut = new NfaTransList; + + fsm->startState->nfaOut->append( trans ); + fsm->attachToNfa( fsm->startState, *s, trans ); + } + } + else { + /* Merge the start states. */ + if ( fsm->ctx->printStatistics ) + stats << "nfa-fill-round\t0" << endl; + + fsm->nfaMergeStates( fsm->startState, startStateSet.data, startStateSet.length() ); + + long removed = fsm->removeUnreachableStates(); + if ( fsm->ctx->printStatistics ) + stats << "round-unreach\t" << removed << endl; + + /* Fill in any new states made from merging. */ + for ( long i = 1; i < depth; i++ ) { + if ( fsm->ctx->printStatistics ) + stats << "nfa-fill-round\t" << i << endl; + + if ( fsm->nfaList.length() == 0 ) + break; + + fsm->nfaFillInStates( ); + + long removed = fsm->removeUnreachableStates(); + if ( fsm->ctx->printStatistics ) + stats << "round-unreach\t" << removed << endl; + } + + fsm->finalizeNfaRound(); + + long maxStateSetSize = 0; + long count = 0; + for ( StateList::Iter s = fsm->stateList; s.lte(); s++ ) { + if ( s->nfaOut != 0 && s->nfaOut->length() > 0 ) { + count += 1; + if ( s->nfaOut->length() > maxStateSetSize ) + maxStateSetSize = s->nfaOut->length(); + } + } + + if ( fsm->ctx->printStatistics ) { + stats << "fill-list\t" << count << endl; + stats << "state-dict\t" << fsm->stateDict.length() << endl; + stats << "states\t" << fsm->stateList.length() << endl; + stats << "max-ss\t" << maxStateSetSize << endl; + } + + fsm->removeUnreachableStates(); + + if ( fsm->ctx->printStatistics ) + stats << "post-unreachable\t" << fsm->stateList.length() << endl; + + fsm->minimizePartition2(); + + if ( fsm->ctx->printStatistics ) { + stats << "post-min\t" << fsm->stateList.length() << std::endl; + stats << std::endl; + } + } + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +FsmRes FsmAp::nfaUnion( const NfaRoundVect &roundsList, + FsmAp **machines, int numMachines, + std::ostream &stats, bool printStatistics ) +{ + long sumPlain = 0, sumMin = 0; + for ( int i = 0; i < numMachines; i++ ) { + sumPlain += machines[i]->stateList.length(); + + machines[i]->removeUnreachableStates(); + machines[i]->minimizePartition2(); + + sumMin += machines[i]->stateList.length(); + } + + if ( printStatistics ) { + stats << "sum-plain\t" << sumPlain << endl; + stats << "sum-minimized\t" << sumMin << endl; + } + + /* For each round. */ + for ( NfaRoundVect::Iter r = roundsList; r.lte(); r++ ) { + + if ( printStatistics ) { + stats << "depth\t" << r->depth << endl; + stats << "grouping\t" << r->groups << endl; + } + + int numGroups = 0; + int start = 0; + while ( start < numMachines ) { + /* If nfa-group-max is zero, don't group, put all terms into a single + * n-depth NFA. */ + int amount = r->groups == 0 ? numMachines : r->groups; + if ( ( start + amount ) > numMachines ) + amount = numMachines - start; + + FsmAp **others = machines + start + 1; + FsmRes res = FsmAp::nfaUnionOp( machines[start], others, (amount - 1), r->depth, stats ); + machines[start] = res.fsm; + + start += amount; + numGroups++; + } + + if ( numGroups == 1 ) + break; + + /* Move the group starts into the groups array. */ + FsmAp **groups = new FsmAp*[numGroups]; + int g = 0; + start = 0; + while ( start < numMachines ) { + groups[g] = machines[start]; + start += r->groups == 0 ? numMachines : r->groups; + g++; + } + + delete[] machines; + machines = groups; + numMachines = numGroups; + } + + FsmAp *ret = machines[0]; + return FsmRes( FsmRes::Fsm(), ret ); +} diff --git a/ragel/fsmstate.cc b/ragel/fsmstate.cc new file mode 100644 index 00000000..36709052 --- /dev/null +++ b/ragel/fsmstate.cc @@ -0,0 +1,600 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" + +#include <string.h> +#include <assert.h> +#include <iostream> + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +StateAp::StateAp() +: + /* No out or in transitions. */ + outList(), + inTrans(), + inCond(), + + /* No EOF target. */ + eofTarget(0), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + stateDictIn(0), + + nfaOut(0), + nfaIn(0), + + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSpace(0), + outCondKeys(), + errActionTable(), + eofActionTable(), + guardedInTable(), + lmNfaParts() +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmAp copy constructor. */ +StateAp::StateAp(const StateAp &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inTrans(), + inCond(), + + /* Set this using the original state's eofTarget. It will get mapped back + * to the new machine in the Fsm copy constructor. */ + eofTarget(other.eofTarget), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + stateDictIn(0), + + nfaOut(0), + nfaIn(0), + + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSpace(other.outCondSpace), + outCondKeys(other.outCondKeys), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable), + + guardedInTable(other.guardedInTable), + lmNfaParts(other.lmNfaParts) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + /* Duplicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + TransDataAp *newTrans = new TransDataAp( *trans->tdap() ); + assert( trans->tdap()->lmActionTable.length() == 0 ); + newTrans->toState = trans->tdap()->toState; + outList.append( newTrans ); + } + else { + /* Duplicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + TransAp *newTrans = new TransCondAp( *trans->tcap() ); + + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + CondAp *newCondTrans = new CondAp( *cti, newTrans ); + newCondTrans->key = cti->key; + + newTrans->tcap()->condList.append( newCondTrans ); + + assert( cti->lmActionTable.length() == 0 ); + + newCondTrans->toState = cti->toState; + } + + outList.append( newTrans ); + } + } + + /* Dup the nfa trans. */ + if ( other.nfaOut != 0 ) { + nfaOut = new NfaTransList; + for ( NfaTransList::Iter trans = *other.nfaOut; trans.lte(); trans++ ) { + NfaTrans *newtrans = new NfaTrans( *trans ); + newtrans->toState = trans->toState; + + nfaOut->append( newtrans ); + } + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +StateAp::~StateAp() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; + + if ( stateDictIn != 0 ) + delete stateDictIn; + + if ( nfaIn != 0 ) + delete nfaIn; + + if ( nfaOut != 0 ) { + nfaOut->empty(); + delete nfaOut; + } +} + +#ifdef TO_UPGRADE_CONDS +/* Compare two states using pointers to the states. With the approximate + * compare, the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & STB_ISFINAL) && !(state2->stateBits & STB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & STB_ISFINAL) && (state2->stateBits & STB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + RangePairIter<TransAp> outPair( ctx, state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIter<TransAp>::RangeInS1: + compareRes = FsmAp::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeInS2: + compareRes = FsmAp::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeOverlap: + compareRes = FsmAp::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::BreakS1: + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + /* Check EOF targets. */ + if ( state1->eofTarget < state2->eofTarget ) + return -1; + else if ( state1->eofTarget > state2->eofTarget ) + return 1; + + if ( state1->guardedIn || !state2->guardedIn ) + return -1; + else if ( !state1->guardedIn || state2->guardedIn ) + return 1; + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} +#endif + + +/* Compare class used in the initial partition. */ +int InitPartitionCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + if ( state1->nfaOut == 0 && state2->nfaOut != 0 ) + return -1; + else if ( state1->nfaOut != 0 && state2->nfaOut == 0 ) + return 1; + else if ( state1->nfaOut != 0 ) { + compareRes = CmpNfaTransList::compare( + *state1->nfaOut, *state2->nfaOut ); + if ( compareRes != 0 ) + return compareRes; + } + + /* Test final state status. */ + if ( (state1->stateBits & STB_ISFINAL) && !(state2->stateBits & STB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & STB_ISFINAL) && (state2->stateBits & STB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the transition pairs. */ + RangePairIter< PiList<TransAp> > + outPair( ctx, state1->outList, state2->outList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIter<TransAp>::RangeInS1: + compareRes = FsmAp::compareTransDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeInS2: + compareRes = FsmAp::compareTransDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeOverlap: + compareRes = FsmAp::compareTransDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::BreakS1: + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + RangePairIter< PiList<TransAp> > outPair( ctx, state1->outList, state2->outList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIter<TransAp>::RangeInS1: + compareRes = FsmAp::compareTransPartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeInS2: + compareRes = FsmAp::compareTransPartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeOverlap: + compareRes = FsmAp::compareTransPartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::BreakS1: + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + /* Test eof targets. */ + if ( state1->eofTarget == 0 && state2->eofTarget != 0 ) + return -1; + else if ( state1->eofTarget != 0 && state2->eofTarget == 0 ) + return 1; + else if ( state1->eofTarget != 0 ) { + /* Both eof targets are set. */ + compareRes = CmpOrd< MinPartition* >::compare( + state1->eofTarget->alg.partition, state2->eofTarget->alg.partition ); + if ( compareRes != 0 ) + return compareRes; + } + + return 0; +} + +#ifdef TO_UPGRADE_CONDS +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const StateAp *state1, + const StateAp *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + RangePairIter<TransAp> outPair( ctx, state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIter<TransAp>::RangeInS1: + if ( FsmAp::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangePairIter<TransAp>::RangeInS2: + if ( FsmAp::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangePairIter<TransAp>::RangeOverlap: + if ( FsmAp::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case RangePairIter<TransAp>::BreakS1: + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + return false; +} +#endif + +/* + * Transition Comparison. + */ + +int FsmAp::comparePart( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1->plain() ) { + int compareRes = FsmAp::compareCondPartPtr( trans1->tdap(), trans2->tdap() ); + if ( compareRes != 0 ) + return compareRes; + } + else { + /* Use a pair iterator to get the transition pairs. */ + ValPairIter< PiList<CondAp> > outPair( trans1->tcap()->condList, + trans2->tcap()->condList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case ValPairIter<CondAp>::RangeInS1: { + int compareRes = FsmAp::compareCondPartPtr<CondAp>( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + } + + case ValPairIter<CondAp>::RangeInS2: { + int compareRes = FsmAp::compareCondPartPtr<CondAp>( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + } + + case ValPairIter<CondAp>::RangeOverlap: { + int compareRes = FsmAp::compareCondPartPtr<CondAp>( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + }} + } + } + + return 0; +} + +/* Compare target partitions. Either pointer may be null. */ +int FsmAp::compareTransPartPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + return comparePart( trans1, trans2 ); + } + + return 0; +} + +template< class Trans > int FsmAp::compareCondPartPtr( Trans *trans1, Trans *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmAp::compareTransDataPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +#ifdef TO_UPGRADE_CONDS +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmAp::compareFullPtr( TransAp *trans1, TransAp *trans2 ) +{ + /* << "FIXME: " << __PRETTY_FUNCTION__ << std::endl; */ + + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( tai(trans1)->tcap()->condList.head->toState < tai(trans2)->tcap()->condList.head->toState ) + return -1; + else if ( tai(trans1)->tcap()->condList.head->toState > tai(trans2)->tcap()->condList.head->toState ) + return 1; + else if ( tai(trans1)->tcap()->condList.head->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} +#endif + +#ifdef TO_UPGRADE_CONDS +bool FsmAp::shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1, + TransAp *trans2 ) +{ + /* << "FIXME: " << __PRETTY_FUNCTION__ << std::endl; */ + + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( tai(trans1)->tcap()->condList.head->toState->alg.stateNum, + tai(trans2)->tcap()->condList.head->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} +#endif diff --git a/ragel/gendata.cc b/ragel/gendata.cc new file mode 100644 index 00000000..c44f7049 --- /dev/null +++ b/ragel/gendata.cc @@ -0,0 +1,1733 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "gendata.h" +#include "ragel.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include "inputdata.h" +#include "version.h" + +#include <string.h> +#include <iostream> + +string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +void openHostBlock( char opener, InputData *id, ostream &out, const char *fileName, int line ) +{ + out << "host( \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } + out << "\", " << line << " ) " << opener << "{"; +} + +void Reducer::appendTrans( TransListVect &outList, Key lowKey, + Key highKey, TransAp *trans ) +{ + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 || trans->tdap()->actionTable.length() > 0 ) + outList.append( TransEl( lowKey, highKey, trans ) ); + } + else { + /* Add once if any cond has a to-state or an action table. */ + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 || cond->actionTable.length() > 0 ) { + outList.append( TransEl( lowKey, highKey, trans ) ); + break; + } + } + } +} + +void Reducer::reduceActionTables() +{ + /* Reduce the actions tables to a set. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + RedActionTable *actionTable = 0; + + /* Reduce To State Actions. */ + if ( st->toStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce From State Actions. */ + if ( st->fromStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce EOF actions. */ + if ( st->eofActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Loop the transitions and reduce their actions. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->actionTable.length() > 0 ) { + if ( actionTableMap.insert( trans->tdap()->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->actionTable.length() > 0 ) { + if ( actionTableMap.insert( cond->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } + } + + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + if ( actionTableMap.insert( n->pushTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + + if ( actionTableMap.insert( n->restoreTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + + if ( actionTableMap.insert( n->popAction, &actionTable ) ) + actionTable->id = nextActionTableId++; + + if ( actionTableMap.insert( n->popTest, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } +} + + +void Reducer::makeText( GenInlineList *outList, InlineItem *item ) +{ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Text ); + inlineItem->data = item->data; + + outList->append( inlineItem ); +} + +void Reducer::makeTargetItem( GenInlineList *outList, NameInst *nameTarg, + GenInlineItem::Type type ) +{ + long targetState; + if ( fsmCtx->generatingSectionSubset ) + targetState = -1; + else { + EntryMapEl *targ = fsm->entryPoints.find( nameTarg->id ); + targetState = targ->value->alg.stateNum; + } + + /* Make the item. */ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), type ); + inlineItem->targId = targetState; + outList->append( inlineItem ); +} + + +void Reducer::makeSubList( GenInlineList *outList, const InputLoc &loc, + InlineList *inlineList, GenInlineItem::Type type ) +{ + /* Fill the sub list. */ + GenInlineList *subList = new GenInlineList; + makeGenInlineList( subList, inlineList ); + + /* Make the item. */ + GenInlineItem *inlineItem = new GenInlineItem( loc, type ); + inlineItem->children = subList; + outList->append( inlineItem ); +} + +/* Make a sublist item with a given type. */ +void Reducer::makeSubList( GenInlineList *outList, + InlineList *inlineList, GenInlineItem::Type type ) +{ + makeSubList( outList, InputLoc(), inlineList, type ); +} + +void Reducer::makeLmOnLast( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 1 ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmOnNext( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 0 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmHold ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeExecGetTokend( GenInlineList *outList ) +{ + /* Make the Exec item. */ + GenInlineItem *execItem = new GenInlineItem( InputLoc(), GenInlineItem::LmExec ); + execItem->children = new GenInlineList; + + /* Make the GetTokEnd */ + GenInlineItem *getTokend = new GenInlineItem( InputLoc(), GenInlineItem::LmGetTokEnd ); + execItem->children->append( getTokend ); + + outList->append( execItem ); +} + +void Reducer::makeLmOnLagBehind( GenInlineList *outList, InlineItem *item ) +{ + /* Jump to the tokend. */ + makeExecGetTokend( outList ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmSwitch( GenInlineList *outList, InlineItem *item ) +{ + GenInlineItem *lmSwitch = new GenInlineItem( InputLoc(), GenInlineItem::LmSwitch ); + GenInlineList *lmList = lmSwitch->children = new GenInlineList; + LongestMatch *longestMatch = item->longestMatch; + + /* We can't put the <exec> here because we may need to handle the error + * case and in that case p should not be changed. Instead use a default + * label in the switch to adjust p when user actions are not set. An id of + * -1 indicates the default. */ + + if ( longestMatch->lmSwitchHandlesError ) { + /* If the switch handles error then we should have also forced the + * error state. */ + assert( fsm->errState != 0 ); + + GenInlineItem *errCase = new GenInlineItem( InputLoc(), GenInlineItem::HostStmt ); + errCase->lmId = 0; + errCase->children = new GenInlineList; + + GenInlineItem *host = new GenInlineItem( item->loc, GenInlineItem::HostStmt ); + host->children = new GenInlineList; + errCase->children->append( host ); + + /* Make the item. This should probably be an LM goto, would eliminate + * need for wrapping in host statement. .*/ + GenInlineItem *gotoItem = new GenInlineItem( InputLoc(), GenInlineItem::Goto ); + gotoItem->targId = fsm->errState->alg.stateNum; + host->children->append( gotoItem ); + + lmList->append( errCase ); + } + + bool needDefault = false; + for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect ) { + if ( lmi->action == 0 ) + needDefault = true; + else { + /* Open the action. Write it with the context that sets up _p + * when doing control flow changes from inside the machine. */ + GenInlineItem *lmCase = new GenInlineItem( InputLoc(), GenInlineItem::LmCase ); + lmCase->lmId = lmi->longestMatchId; + lmCase->children = new GenInlineList; + + makeExecGetTokend( lmCase->children ); + + GenInlineItem *subHost = new GenInlineItem( lmi->action->loc, + GenInlineItem::HostStmt ); + subHost->children = new GenInlineList; + makeGenInlineList( subHost->children, lmi->action->inlineList ); + lmCase->children->append( subHost ); + + lmList->append( lmCase ); + } + } + } + + if ( needDefault ) { + GenInlineItem *defCase = new GenInlineItem( item->loc, GenInlineItem::HostStmt ); + defCase->lmId = -1; + defCase->children = new GenInlineList; + + makeExecGetTokend( defCase->children ); + + lmList->append( defCase ); + } + + outList->append( lmSwitch ); +} + +void Reducer::makeLmNfaOnNext( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 0 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmHold ) ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::NfaClear ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmNfaOnEof( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 0 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::NfaClear ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmNfaOnLast( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 1 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::NfaClear ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + + +void Reducer::makeSetTokend( GenInlineList *outList, long offset ) +{ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokEnd ); + inlineItem->offset = offset; + outList->append( inlineItem ); +} + +void Reducer::makeSetAct( GenInlineList *outList, long lmId ) +{ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetActId ); + inlineItem->lmId = lmId; + outList->append( inlineItem ); +} + +void Reducer::makeGenInlineList( GenInlineList *outList, InlineList *inList ) +{ + for ( InlineList::Iter item = *inList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + makeText( outList, item ); + break; + case InlineItem::Goto: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Goto ); + break; + case InlineItem::GotoExpr: + makeSubList( outList, item->children, GenInlineItem::GotoExpr ); + break; + case InlineItem::Call: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Call ); + break; + case InlineItem::CallExpr: + makeSubList( outList, item->children, GenInlineItem::CallExpr ); + break; + case InlineItem::Ncall: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Ncall ); + break; + case InlineItem::NcallExpr: + makeSubList( outList, item->children, GenInlineItem::NcallExpr ); + break; + case InlineItem::Next: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Next ); + break; + case InlineItem::NextExpr: + makeSubList( outList, item->children, GenInlineItem::NextExpr ); + break; + case InlineItem::Break: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Break ) ); + break; + case InlineItem::Nbreak: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Nbreak ) ); + break; + case InlineItem::Ret: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Ret ) ); + break; + case InlineItem::Nret: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Nret ) ); + break; + case InlineItem::PChar: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::PChar ) ); + break; + case InlineItem::Char: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Char ) ); + break; + case InlineItem::Curs: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Curs ) ); + break; + case InlineItem::Targs: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Targs ) ); + break; + case InlineItem::Entry: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Entry ); + break; + + case InlineItem::Hold: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Hold ) ); + break; + case InlineItem::Exec: + makeSubList( outList, item->children, GenInlineItem::Exec ); + break; + + case InlineItem::LmSetActId: + makeSetAct( outList, item->longestMatchPart->longestMatchId ); + break; + case InlineItem::LmSetTokEnd: + makeSetTokend( outList, 1 ); + break; + + case InlineItem::LmOnLast: + makeLmOnLast( outList, item ); + break; + case InlineItem::LmOnNext: + makeLmOnNext( outList, item ); + break; + case InlineItem::LmOnLagBehind: + makeLmOnLagBehind( outList, item ); + break; + case InlineItem::LmSwitch: + makeLmSwitch( outList, item ); + break; + + case InlineItem::LmNfaOnLast: + makeLmNfaOnLast( outList, item ); + break; + case InlineItem::LmNfaOnNext: + makeLmNfaOnNext( outList, item ); + break; + case InlineItem::LmNfaOnEof: + makeLmNfaOnEof( outList, item ); + break; + + case InlineItem::LmInitAct: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmInitAct ) ); + break; + case InlineItem::LmInitTokStart: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmInitTokStart ) ); + break; + case InlineItem::LmSetTokStart: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokStart ) ); + hasLongestMatch = true; + break; + case InlineItem::Stmt: + makeSubList( outList, item->children, GenInlineItem::GenStmt ); + break; + case InlineItem::Subst: { + /* Find the subst action. */ + Action *subst = curInlineAction->argList->data[item->substPos]; + makeGenInlineList( outList, subst->inlineList ); + break; + } + case InlineItem::NfaWrapAction: { + GenAction *wrap = allActions + item->wrappedAction->actionId; + GenInlineItem *gii = new GenInlineItem( InputLoc(), + GenInlineItem::NfaWrapAction ); + gii->wrappedAction = wrap; + outList->append( gii ); + break; + } + case InlineItem::NfaWrapConds: { + GenCondSpace *condSpace = allCondSpaces + item->condSpace->condSpaceId; + + GenInlineItem *gii = new GenInlineItem( InputLoc(), + GenInlineItem::NfaWrapConds ); + gii->condSpace = condSpace; + gii->condKeySet = item->condKeySet; + outList->append( gii ); + break; + }} + } +} + +void Reducer::makeExports() +{ + for ( ExportList::Iter exp = fsmCtx->exportList; exp.lte(); exp++ ) + exportList.append( new Export( exp->name, exp->key ) ); +} + +void Reducer::makeAction( Action *action ) +{ + GenInlineList *genList = new GenInlineList; + + curInlineAction = action; + makeGenInlineList( genList, action->inlineList ); + curInlineAction = 0; + + newAction( curAction++, action->name, action->loc, genList ); +} + + +void Reducer::makeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = fsmCtx->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + /* Write the list. */ + initActionList( nextActionId ); + curAction = 0; + + for ( ActionList::Iter act = fsmCtx->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + makeAction( act ); + } +} + +void Reducer::makeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + initActionTableList( numTables ); + curActionTable = 0; + + for ( int t = 0; t < numTables; t++ ) { + long length = tables[t]->key.length(); + + /* Collect the action table. */ + RedAction *redAct = allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + redAct->key[atel.pos()].key = 0; + redAct->key[atel.pos()].value = allActions + + atel->value->actionId; + } + + /* Insert into the action table map. */ + redFsm->actionMap.insert( redAct ); + + curActionTable += 1; + } + + delete[] tables; +} + +void Reducer::makeConditions() +{ + if ( fsm->ctx->condData->condSpaceMap.length() > 0 ) { + /* Allocate condition space ids. */ + long nextCondSpaceId = 0; + for ( CondSpaceMap::Iter cs = fsm->ctx->condData->condSpaceMap; cs.lte(); cs++ ) + cs->condSpaceId = nextCondSpaceId++; + + /* Allocate the array of conditions and put them on the list. */ + long length = fsm->ctx->condData->condSpaceMap.length(); + allCondSpaces = new GenCondSpace[length]; + for ( long c = 0; c < length; c++ ) + condSpaceList.append( &allCondSpaces[c] ); + + long curCondSpace = 0; + for ( CondSpaceMap::Iter cs = fsm->ctx->condData->condSpaceMap; cs.lte(); cs++ ) { + /* Transfer the id. */ + allCondSpaces[curCondSpace].condSpaceId = cs->condSpaceId; + + curCondSpace += 1; + } + } + + makeActionList(); + makeActionTableList(); + + if ( fsm->ctx->condData->condSpaceMap.length() > 0 ) { + long curCondSpace = 0; + for ( CondSpaceMap::Iter cs = fsm->ctx->condData->condSpaceMap; cs.lte(); cs++ ) { + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + condSpaceItem( curCondSpace, (*csi)->actionId ); + curCondSpace += 1; + } + } +} + +bool Reducer::makeNameInst( std::string &res, NameInst *nameInst ) +{ + bool written = false; + if ( nameInst->parent != 0 ) + written = makeNameInst( res, nameInst->parent ); + + if ( !nameInst->name.empty() ) { + if ( written ) + res += '_'; + res += nameInst->name; + written = true; + } + + return written; +} + +void Reducer::makeEntryPoints() +{ + /* List of entry points other than start state. */ + if ( fsm->entryPoints.length() > 0 || fsmCtx->lmRequiresErrorState ) { + if ( fsmCtx->lmRequiresErrorState ) + setForcedErrorState(); + + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + NameInst *nameInst = fsmCtx->nameIndex[en->key]; + std::string name; + makeNameInst( name, nameInst ); + StateAp *state = en->value; + addEntryPoint( strdup(name.c_str()), state->alg.stateNum ); + } + } +} + +void Reducer::makeStateActions( StateAp *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 ) { + long to = -1; + if ( toStateActions != 0 ) + to = toStateActions->id; + + long from = -1; + if ( fromStateActions != 0 ) + from = fromStateActions->id; + + setStateActions( curState, to, from, -1 ); + } +} + +void Reducer::makeTrans( Key lowKey, Key highKey, TransAp *trans ) +{ + RedCondEl *outConds; + int numConds; + + assert( ( allStates + curState ) != redFsm->errState ); + + if ( trans->plain() ) { + long targ = -1; + long action = -1; + + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->tdap()->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->tdap()->actionTable ); + + if ( trans->tdap()->toState != 0 ) + targ = trans->tdap()->toState->alg.stateNum; + + if ( actionTable != 0 ) + action = actionTable->id; + + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : redFsm->getErrorState(); + RedAction *at = action >= 0 ? (allActionTables + action) : 0; + + RedTransAp *trans = redFsm->allocateTrans( targState, at ); + newTrans( allStates + curState, lowKey, highKey, trans ); + } + else { + numConds = trans->tcap()->condList.length(); + outConds = new RedCondEl[numConds]; + int pos = 0; + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++, pos++ ) { + long targ = -1; + long action = -1; + + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( cti->actionTable.length() > 0 ) + actionTable = actionTableMap.find( cti->actionTable ); + + if ( cti->toState != 0 ) + targ = cti->toState->alg.stateNum; + + if ( actionTable != 0 ) + action = actionTable->id; + + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : redFsm->getErrorState(); + RedAction *at = action >= 0 ? (allActionTables + action) : 0; + RedCondAp *cond = redFsm->allocateCond( targState, at ); + + outConds[pos].key = cti->key; + outConds[pos].value = cond; + } + + GenCondSpace *condSpace = allCondSpaces + trans->condSpace->condSpaceId; + + /* If the cond list is not full then we need an error cond. */ + RedCondAp *errCond = 0; + if ( numConds < ( 1 << condSpace->condSet.length() ) ) + errCond = redFsm->getErrorCond(); + + RedTransAp *trans = redFsm->allocateTrans( + condSpace, outConds, numConds, errCond ); + + newTrans( allStates + curState, lowKey, highKey, trans ); + } +} + +void Reducer::makeEofTrans( StateAp *state ) +{ + /* EOF actions go out here only if the state has no eof target. If it has + * an eof target then an eof transition will be used instead. */ + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + /* Add an EOF transition if we have conditions, a target, or actions, */ + if ( state->outCondSpace != 0 || state->eofTarget != 0 || eofActions != 0 ) + redFsm->bAnyEofActivity = true; + + long targ = state->alg.stateNum; + long action = -1; + + if ( state->eofTarget != 0 ) + targ = state->eofTarget->alg.stateNum; + + if ( eofActions != 0 ) + action = eofActions->id; + + + if ( state->outCondSpace == 0 ) { + // std::cerr << "setEofTrans( " << + // state->alg.stateNum << ", " << targ << ", " << action << " );" << endl; + + setEofTrans( state->alg.stateNum, targ, action ); + } + else { + int numConds = state->outCondKeys.length(); + RedCondEl *outConds = new RedCondEl[numConds]; + for ( int pos = 0; pos < numConds; pos++ ) { + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : redFsm->getErrorState(); + RedAction *at = action >= 0 ? (allActionTables + action) : 0; + RedCondAp *cond = redFsm->allocateCond( targState, at ); + + outConds[pos].key = state->outCondKeys[pos]; + outConds[pos].value = cond; + } + + GenCondSpace *condSpace = allCondSpaces + state->outCondSpace->condSpaceId; + + /* If the cond list is not full then we need an error cond. */ + RedCondAp *errCond = 0; + if ( numConds < ( 1 << condSpace->condSet.length() ) ) + errCond = redFsm->getErrorCond(); + + setEofTrans( state->alg.stateNum, condSpace, outConds, numConds, errCond ); + } +} + + +void Reducer::makeTransList( StateAp *state ) +{ + TransListVect outList; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + initTransList( curState, outList.length() ); + + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + makeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + + finishTransList( curState ); +} + +void Reducer::makeStateList() +{ + /* Write the list of states. */ + long length = fsm->stateList.length(); + initStateList( length ); + curState = 0; + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + makeStateActions( st ); + makeEofTrans( st ); + makeTransList( st ); + + long id = st->alg.stateNum; + setId( curState, id ); + + if ( st->isFinState() ) + setFinal( curState ); + + if ( st->nfaOut != 0 ) { + RedStateAp *from = allStates + curState; + from->nfaTargs = new RedNfaTargs; + for ( NfaTransList::Iter targ = *st->nfaOut; targ.lte(); targ++ ) { + RedStateAp *rtarg = allStates + targ->toState->alg.stateNum; + + RedAction *pushRa = 0; + RedAction *popTestRa = 0; + + if ( targ->pushTable.length() > 0 ) { + RedActionTable *pushActions = + actionTableMap.find( targ->pushTable ); + pushRa = allActionTables + pushActions->id; + } + + if ( targ->popTest.length() > 0 ) { + RedActionTable *popActions = + actionTableMap.find( targ->popTest ); + popTestRa = allActionTables + popActions->id; + } + + + from->nfaTargs->append( RedNfaTarg( rtarg, pushRa, + popTestRa, targ->order ) ); + + MergeSort<RedNfaTarg, RedNfaTargCmp> sort; + sort.sort( from->nfaTargs->data, from->nfaTargs->length() ); + } + } + + curState += 1; + } +} + +void Reducer::makeMachine() +{ + createMachine(); + + /* Action tables. */ + reduceActionTables(); + + makeConditions(); + + /* Start State. */ + setStartState( fsm->startState->alg.stateNum ); + + /* Error state. */ + if ( fsm->errState != 0 ) + setErrorState( fsm->errState->alg.stateNum ); + + makeEntryPoints(); + makeStateList(); + + resolveTargetStates(); +} + +void Reducer::make( const HostLang *hostLang, const HostType *alphType ) +{ + /* Alphabet type. */ + setAlphType( hostLang, alphType->internalName ); + + /* Getkey expression. */ + if ( fsmCtx->getKeyExpr != 0 ) { + getKeyExpr = new GenInlineList; + makeGenInlineList( getKeyExpr, fsmCtx->getKeyExpr ); + } + + /* Access expression. */ + if ( fsmCtx->accessExpr != 0 ) { + accessExpr = new GenInlineList; + makeGenInlineList( accessExpr, fsmCtx->accessExpr ); + } + + /* PrePush expression. */ + if ( fsmCtx->prePushExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->prePushExpr->inlineList ); + prePushExpr = new GenInlineExpr( fsmCtx->prePushExpr->loc, il ); + } + + /* PostPop expression. */ + if ( fsmCtx->postPopExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->postPopExpr->inlineList ); + postPopExpr = new GenInlineExpr( fsmCtx->postPopExpr->loc, il ); + } + + /* PrePush expression. */ + if ( fsmCtx->nfaPrePushExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->nfaPrePushExpr->inlineList ); + nfaPrePushExpr = new GenInlineExpr( fsmCtx->nfaPrePushExpr->loc, il ); + } + + /* PostPop expression. */ + if ( fsmCtx->nfaPostPopExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->nfaPostPopExpr->inlineList ); + nfaPostPopExpr = new GenInlineExpr( fsmCtx->nfaPostPopExpr->loc, il ); + } + + + /* + * Variable expressions. + */ + + if ( fsmCtx->pExpr != 0 ) { + pExpr = new GenInlineList; + makeGenInlineList( pExpr, fsmCtx->pExpr ); + } + + if ( fsmCtx->peExpr != 0 ) { + peExpr = new GenInlineList; + makeGenInlineList( peExpr, fsmCtx->peExpr ); + } + + if ( fsmCtx->eofExpr != 0 ) { + eofExpr = new GenInlineList; + makeGenInlineList( eofExpr, fsmCtx->eofExpr ); + } + + if ( fsmCtx->csExpr != 0 ) { + csExpr = new GenInlineList; + makeGenInlineList( csExpr, fsmCtx->csExpr ); + } + + if ( fsmCtx->topExpr != 0 ) { + topExpr = new GenInlineList; + makeGenInlineList( topExpr, fsmCtx->topExpr ); + } + + if ( fsmCtx->stackExpr != 0 ) { + stackExpr = new GenInlineList; + makeGenInlineList( stackExpr, fsmCtx->stackExpr ); + } + + if ( fsmCtx->actExpr != 0 ) { + actExpr = new GenInlineList; + makeGenInlineList( actExpr, fsmCtx->actExpr ); + } + + if ( fsmCtx->tokstartExpr != 0 ) { + tokstartExpr = new GenInlineList; + makeGenInlineList( tokstartExpr, fsmCtx->tokstartExpr ); + } + + if ( fsmCtx->tokendExpr != 0 ) { + tokendExpr = new GenInlineList; + makeGenInlineList( tokendExpr, fsmCtx->tokendExpr ); + } + + if ( fsmCtx->dataExpr != 0 ) { + dataExpr = new GenInlineList; + makeGenInlineList( dataExpr, fsmCtx->dataExpr ); + } + + makeExports(); + makeMachine(); + + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + redFsm->maxKey = findMaxKey(); + + redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + redFsm->findFirstFinState(); +} + +void Reducer::createMachine() +{ + redFsm = new RedFsmAp( fsm->ctx, machineId ); +} + +void Reducer::initActionList( unsigned long length ) +{ + allActions = new GenAction[length]; + for ( unsigned long a = 0; a < length; a++ ) + actionList.append( allActions+a ); +} + +void Reducer::newAction( int anum, std::string name, + const InputLoc &loc, GenInlineList *inlineList ) +{ + allActions[anum].actionId = anum; + allActions[anum].name = name; + allActions[anum].loc = loc; + allActions[anum].inlineList = inlineList; +} + +void Reducer::initActionTableList( unsigned long length ) +{ + allActionTables = new RedAction[length]; +} + +void Reducer::initStateList( unsigned long length ) +{ + redFsm->allStates = allStates = new RedStateAp[length]; + for ( unsigned long s = 0; s < length; s++ ) + redFsm->stateList.append( allStates+s ); + + /* We get the start state as an offset, set the pointer now. */ + if ( startState >= 0 ) + redFsm->startState = allStates + startState; + if ( errState >= 0 ) + redFsm->errState = allStates + errState; + for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ ) + redFsm->entryPoints.insert( allStates + *en ); + + /* The nextStateId is no longer used to assign state ids (they come in set + * from the frontend now), however generation code still depends on it. + * Should eventually remove this variable. */ + redFsm->nextStateId = redFsm->stateList.length(); +} + +void Reducer::setStartState( unsigned long _startState ) +{ + startState = _startState; +} + +void Reducer::setErrorState( unsigned long _errState ) +{ + errState = _errState; +} + +void Reducer::addEntryPoint( char *name, unsigned long entryState ) +{ + entryPointIds.append( entryState ); + entryPointNames.append( name ); +} + +void Reducer::initTransList( int snum, unsigned long length ) +{ + /* Could preallocate the out range to save time growing it. For now do + * nothing. */ +} + +void Reducer::newTrans( RedStateAp *state, Key lowKey, Key highKey, RedTransAp *trans ) +{ + /* Get the current state and range. */ + RedTransList &destRange = state->outRange; + + /* Reduced machines are complete. We need to fill any gaps with the error + * transitions. */ + if ( destRange.length() == 0 ) { + /* Range is currently empty. */ + if ( keyOps->lt( keyOps->minKey, lowKey ) ) { + /* The first range doesn't start at the low end. */ + Key fillHighKey = lowKey; + keyOps->decrement( fillHighKey ); + + /* Create the filler with the state's error transition. */ + RedTransEl newTel( fsm->ctx->keyOps->minKey, fillHighKey, + redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + else { + /* The range list is not empty, get the the last range. */ + RedTransEl *last = &destRange[destRange.length()-1]; + Key nextKey = last->highKey; + keyOps->increment( nextKey ); + if ( keyOps->lt( nextKey, lowKey ) ) { + /* There is a gap to fill. Make the high key. */ + Key fillHighKey = lowKey; + keyOps->decrement( fillHighKey ); + + /* Create the filler with the state's error transtion. */ + RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + + /* Filler taken care of. Append the range. */ + destRange.append( RedTransEl( lowKey, highKey, trans ) ); +} + +void Reducer::finishTransList( int snum ) +{ + /* Get the current state and range. */ + RedStateAp *curState = allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* We may need filler on the end. */ + /* Check if there are any ranges already. */ + if ( destRange.length() == 0 ) { + /* Fill with the whole alphabet. */ + /* Add the range on the lower and upper bound. */ + RedTransEl newTel( fsm->ctx->keyOps->minKey, + fsm->ctx->keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + else { + /* Get the last and check for a gap on the end. */ + RedTransEl *last = &destRange[destRange.length()-1]; + if ( keyOps->lt( last->highKey, fsm->ctx->keyOps->maxKey ) ) { + /* Make the high key. */ + Key fillLowKey = last->highKey; + keyOps->increment( fillLowKey ); + + /* Create the new range with the error trans and append it. */ + RedTransEl newTel( fillLowKey, fsm->ctx->keyOps->maxKey, + redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } +} + +void Reducer::setId( int snum, int id ) +{ + RedStateAp *curState = allStates + snum; + curState->id = id; +} + +void Reducer::setFinal( int snum ) +{ + RedStateAp *curState = allStates + snum; + curState->isFinal = true; +} + + +void Reducer::setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ) +{ + RedStateAp *curState = allStates + snum; + if ( toStateAction >= 0 ) + curState->toStateAction = allActionTables + toStateAction; + if ( fromStateAction >= 0 ) + curState->fromStateAction = allActionTables + fromStateAction; + if ( eofAction >= 0 ) + curState->eofAction = allActionTables + eofAction; +} + +void Reducer::setEofTrans( int snum, long eofTarget, long actId ) +{ + RedStateAp *curState = allStates + snum; + RedStateAp *targState = allStates + eofTarget; + RedAction *eofAct = actId >= 0 ? allActionTables + actId : 0; + + RedTransAp *trans = redFsm->allocateTrans( targState, eofAct ); + curState->eofTrans = trans; +} + +void Reducer::setEofTrans( int snum, GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ) +{ + RedStateAp *curState = allStates + snum; + + RedTransAp *trans = redFsm->allocateTrans( condSpace, outConds, numConds, errCond ); + + curState->eofTrans = trans; +} + +void Reducer::resolveTargetStates( GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Goto: case GenInlineItem::Call: + case GenInlineItem::Ncall: case GenInlineItem::Next: + case GenInlineItem::Entry: + item->targState = allStates + item->targId; + break; + default: + break; + } + + if ( item->children != 0 ) + resolveTargetStates( item->children ); + } +} + +void Reducer::resolveTargetStates() +{ + for ( GenActionList::Iter a = actionList; a.lte(); a++ ) + resolveTargetStates( a->inlineList ); + +#if 0 + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + setLabelsNeeded( item->value->inlineList ); + } + + if ( st->eofTrans != 0 ) { + long condsFullSize = st->eofTrans->condFullSize(); + for ( int c = 0; c < condsFullSize; c++ ) { + RedCondPair *pair = st->eofTrans->outCond( c ); + setLabelsNeeded( pair ); + } + } +#endif +} + +bool Reducer::setAlphType( const HostLang *hostLang, const char *data ) +{ + HostType *alphType = findAlphTypeInternal( hostLang, data ); + if ( alphType == 0 ) + return false; + + return true; +} + +void Reducer::condSpaceItem( int cnum, long condActionId ) +{ + GenCondSpace *cond = allCondSpaces + cnum; + cond->condSet.append( allActions + condActionId ); +} + +void Reducer::initStateCondList( int snum, ulong length ) +{ + /* Could preallocate these, as we could with transitions. */ +} + +void Reducer::addStateCond( int snum, Key lowKey, Key highKey, long condNum ) +{ +} + +Key Reducer::findMaxKey() +{ + Key maxKey = fsm->ctx->keyOps->maxKey; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + assert( st->outSingle.length() == 0 ); + assert( st->defTrans == 0 ); + + long rangeLen = st->outRange.length(); + if ( rangeLen > 0 ) { + Key highKey = st->outRange[rangeLen-1].highKey; + if ( keyOps->gt( highKey, maxKey ) ) + maxKey = highKey; + } + } + return maxKey; +} + +void Reducer::actionActionRefs( RedAction *action ) +{ + action->numTransRefs += 1; + for ( GenActionTable::Iter item = action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; +} + +void Reducer::transActionRefs( RedTransAp *trans ) +{ + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 ) + actionActionRefs( cond->action ); + } + + if ( trans->condSpace != 0 ) + trans->condSpace->numTransRefs += 1; +} + +void Reducer::transListActionRefs( RedTransList &list ) +{ + for ( RedTransList::Iter rtel = list; rtel.lte(); rtel++ ) + transActionRefs( rtel->value ); +} + +void Reducer::findFinalActionRefs() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Rerence count out of single transitions. */ + transListActionRefs( st->outSingle ); + + /* Reference count out of range transitions. */ + transListActionRefs( st->outRange ); + + /* Reference count default transition. */ + if ( st->defTrans != 0 ) + transActionRefs( st->defTrans ); + + /* Reference count EOF transitions. */ + if ( st->eofTrans != 0 ) + transActionRefs( st->eofTrans ); + + /* Reference count to state actions. */ + if ( st->toStateAction != 0 ) { + st->toStateAction->numToStateRefs += 1; + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + item->value->numToStateRefs += 1; + } + + /* Reference count from state actions. */ + if ( st->fromStateAction != 0 ) { + st->fromStateAction->numFromStateRefs += 1; + for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) + item->value->numFromStateRefs += 1; + } + + /* Reference count EOF actions. */ + if ( st->eofAction != 0 ) { + st->eofAction->numEofRefs += 1; + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + item->value->numEofRefs += 1; + } + + if ( st->nfaTargs != 0 ) { + for ( RedNfaTargs::Iter nt = *st->nfaTargs; nt.lte(); nt++ ) { + + if ( nt->push != 0 ) { + nt->push->numNfaPushRefs += 1; + for ( GenActionTable::Iter item = nt->push->key; item.lte(); item++ ) + item->value->numNfaPushRefs += 1; + } + + if ( nt->popTest != 0 ) { + nt->popTest->numNfaPopTestRefs += 1; + for ( GenActionTable::Iter item = nt->popTest->key; item.lte(); item++ ) + item->value->numNfaPopTestRefs += 1; + } + } + } + } +} + +void Reducer::analyzeAction( GenAction *act, GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Only consider actions that are referenced. */ + if ( act->numRefs() > 0 ) { + if ( item->type == GenInlineItem::Goto || item->type == GenInlineItem::GotoExpr ) + { + redFsm->bAnyActionGotos = true; + } + else if ( item->type == GenInlineItem::Call || item->type == GenInlineItem::CallExpr ) { + redFsm->bAnyActionCalls = true; + } + else if ( item->type == GenInlineItem::Ncall || item->type == GenInlineItem::NcallExpr ) { + redFsm->bAnyActionCalls = true; + } + else if ( item->type == GenInlineItem::Ret ) + redFsm->bAnyActionRets = true; + else if ( item->type == GenInlineItem::Nret ) + redFsm->bAnyActionNrets = true; + else if ( item->type == GenInlineItem::LmInitAct || + item->type == GenInlineItem::LmSetActId || + item->type == GenInlineItem::LmSwitch ) + { + redFsm->bUsingAct = true; + } + + /* Any by value control in all actions? */ + if ( item->type == GenInlineItem::CallExpr || item->type == GenInlineItem::GotoExpr ) + redFsm->bAnyActionByValControl = true; + } + + /* Check for various things in regular actions. */ + if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || act->numFromStateRefs > 0 ) { + /* Any returns in regular actions? */ + if ( item->type == GenInlineItem::Ret || item->type == GenInlineItem::Nret ) + redFsm->bAnyRegActionRets = true; + + /* Any next statements in the regular actions? */ + if ( item->type == GenInlineItem::Next || item->type == GenInlineItem::NextExpr || + item->type == GenInlineItem::Ncall || item->type == GenInlineItem::NcallExpr || + item->type == GenInlineItem::Nret ) + redFsm->bAnyRegNextStmt = true; + + /* Any by value control in regular actions? */ + if ( item->type == GenInlineItem::CallExpr || item->type == GenInlineItem::GotoExpr ) + redFsm->bAnyRegActionByValControl = true; + + /* Any references to the current state in regular actions? */ + if ( item->type == GenInlineItem::Curs ) + redFsm->bAnyRegCurStateRef = true; + + if ( item->type == GenInlineItem::Break ) + redFsm->bAnyRegBreak = true; + + if ( item->type == GenInlineItem::Nbreak ) + redFsm->bAnyRegNbreak = true; + } + + if ( item->children != 0 ) + analyzeAction( act, item->children ); + } +} + +void Reducer::analyzeActionList( RedAction *redAct, GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Any next statements in the action table? */ + if ( item->type == GenInlineItem::Next || item->type == GenInlineItem::NextExpr || + item->type == GenInlineItem::Ncall || item->type == GenInlineItem::NcallExpr || + item->type == GenInlineItem::Nret ) + redAct->bAnyNextStmt = true; + + /* Any references to the current state. */ + if ( item->type == GenInlineItem::Curs ) + redAct->bAnyCurStateRef = true; + + if ( item->type == GenInlineItem::Break ) + redAct->bAnyBreakStmt = true; + + if ( item->type == GenInlineItem::NfaWrapConds ) + item->condSpace->numNfaRefs += 1; + + if ( item->children != 0 ) + analyzeActionList( redAct, item->children ); + } +} + +/* Assign ids to referenced actions. */ +void Reducer::assignActionIds() +{ + int nextActionId = 0; + for ( GenActionList::Iter act = actionList; act.lte(); act++ ) { + /* Only ever interested in referenced actions. */ + if ( act->numRefs() > 0 ) + act->actionId = nextActionId++; + } +} + +void Reducer::setValueLimits() +{ + redFsm->maxSingleLen = 0; + redFsm->maxRangeLen = 0; + redFsm->maxKeyOffset = 0; + redFsm->maxIndexOffset = 0; + redFsm->maxActListId = 0; + redFsm->maxActionLoc = 0; + redFsm->maxActArrItem = 0; + redFsm->maxSpan = 0; + redFsm->maxFlatIndexOffset = 0; + redFsm->maxCondSpaceId = 0; + + /* In both of these cases the 0 index is reserved for no value, so the max + * is one more than it would be if they started at 0. */ + redFsm->maxIndex = redFsm->transSet.length(); + redFsm->maxCond = condSpaceList.length(); + + /* The nextStateId - 1 is the last state id assigned. */ + redFsm->maxState = redFsm->nextStateId - 1; + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + if ( csi->condSpaceId > redFsm->maxCondSpaceId ) + redFsm->maxCondSpaceId = csi->condSpaceId; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Maximum single length. */ + if ( st->outSingle.length() > redFsm->maxSingleLen ) + redFsm->maxSingleLen = st->outSingle.length(); + + /* Maximum range length. */ + if ( st->outRange.length() > redFsm->maxRangeLen ) + redFsm->maxRangeLen = st->outRange.length(); + + /* The key offset index offset for the state after last is not used, skip it.. */ + if ( ! st.last() ) { + redFsm->maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; + redFsm->maxIndexOffset += st->outSingle.length() + st->outRange.length() + 2; + } + + /* Max key span. */ + if ( st->transList != 0 ) { + unsigned long long span = fsm->ctx->keyOps->span( st->lowKey, st->highKey ); + if ( span > redFsm->maxSpan ) + redFsm->maxSpan = span; + } + + /* Max flat index offset. */ + if ( ! st.last() ) { + if ( st->transList != 0 ) + redFsm->maxFlatIndexOffset += fsm->ctx->keyOps->span( st->lowKey, st->highKey ); + redFsm->maxFlatIndexOffset += 1; + } + } + + for ( GenActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) { + /* Maximum id of action lists. */ + if ( at->actListId+1 > redFsm->maxActListId ) + redFsm->maxActListId = at->actListId+1; + + /* Maximum location of items in action array. */ + if ( at->location+1 > redFsm->maxActionLoc ) + redFsm->maxActionLoc = at->location+1; + + /* Maximum values going into the action array. */ + if ( at->key.length() > redFsm->maxActArrItem ) + redFsm->maxActArrItem = at->key.length(); + for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) { + if ( item->value->actionId > redFsm->maxActArrItem ) + redFsm->maxActArrItem = item->value->actionId; + } + } +} + +/* Gather various info on the machine. */ +void Reducer::analyzeMachine() +{ + /* Find the true count of action references. */ + findFinalActionRefs(); + + /* Check if there are any calls in action code. */ + for ( GenActionList::Iter act = actionList; act.lte(); act++ ) { + /* Record the occurrence of various kinds of actions. */ + if ( act->numToStateRefs > 0 ) + redFsm->bAnyToStateActions = true; + if ( act->numFromStateRefs > 0 ) + redFsm->bAnyFromStateActions = true; + if ( act->numEofRefs > 0 ) + redFsm->bAnyEofActions = true; + if ( act->numTransRefs > 0 ) + redFsm->bAnyRegActions = true; + + if ( act->numNfaPushRefs > 0 ) { + redFsm->bAnyNfaPushPops = true; + redFsm->bAnyNfaPushes = true; + } + + if ( act->numNfaPopActionRefs > 0 ) { + redFsm->bAnyNfaPushPops = true; + redFsm->bAnyNfaPops = true; + } + + if ( act->numNfaPopTestRefs > 0 ) { + redFsm->bAnyNfaPushPops = true; + redFsm->bAnyNfaPops = true; + } + + /* Recurse through the action's parse tree looking for various things. */ + analyzeAction( act, act->inlineList ); + } + + /* Analyze reduced action lists. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ ) + if ( act->value->inlineList != 0 ) + analyzeActionList( redAct, act->value->inlineList ); + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) + redFsm->bAnyNfaStates = true; + } + + /* Find states that have transitions with actions that have next + * statements. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Check any actions out of outSinge. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 && cond->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + } + + /* Check any actions out of outRange. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 && cond->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + } + + /* Check any action out of default. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 && cond->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + } + + if ( st->eofTrans != 0 ) + redFsm->bAnyEofTrans = true; + } + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + + if ( condSpace->numTransRefs > 0 ) + redFsm->bAnyTransCondRefs = true; + + if ( condSpace->numNfaRefs > 0 ) + redFsm->bAnyNfaCondRefs = true; + } + + /* Assign ids to actions that are referenced. */ + assignActionIds(); + + /* Set the maximums of various values used for deciding types. */ + setValueLimits(); +} + +void CodeGenData::genOutputLineDirective( std::ostream &out ) const +{ + std::streambuf *sbuf = out.rdbuf(); + output_filter *filter = dynamic_cast<output_filter*>(sbuf); + if ( filter != 0 ) + (*genLineDirective)( out, lineDirectives, filter->line + 1, filter->fileName ); +} + +void CodeGenData::write_option_error( InputLoc &loc, std::string arg ) +{ + red->id->warning(loc) << "unrecognized write option \"" << arg << "\"" << std::endl; +} + +void CodeGenData::writeClear() +{ + clear(); + + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + red->fsm->ctx->actionList.empty(); + + delete red->fsm; + red->fsm = 0; + + // red->pd->graphDict.empty(); + + cleared = true; +} + +void CodeGenData::collectReferences() +{ + /* Do this once only. */ + if ( !referencesCollected ) { + referencesCollected = true; + + /* Nullify the output and execute the write. We use this pass to collect references. */ + nullbuf nb; + std::streambuf *filt = out.rdbuf( &nb ); + writeExec(); + + /* Restore the output for whatever writing comes next. */ + out.rdbuf( filt ); + } +} + +void CodeGenData::writeStatement( InputLoc &loc, int nargs, + std::vector<std::string> &args, bool generateDot, const HostLang *hostLang ) +{ + /* Start write generation on a fresh line. */ + out << '\n'; + + if ( cleared ) { + red->id->error(loc) << "write statement following a clear is invalid" << std::endl; + return; + } + + genOutputLineDirective( out ); + + if ( args[0] == "data" ) { + for ( int i = 1; i < nargs; i++ ) { + if ( args[i] == "noerror" ) + noError = true; + else if ( args[i] == "noprefix" ) + noPrefix = true; + else if ( args[i] == "nofinal" ) + noFinal = true; + else + write_option_error( loc, args[i] ); + } + + if ( red->id->printStatistics ) { + red->id->stats() << "fsm-name\t" << fsmName << std::endl; + red->id->stats() << "fsm-states\t" << redFsm->stateList.length() << std::endl; + } + + collectReferences(); + writeData(); + statsSummary(); + } + else if ( args[0] == "init" ) { + for ( int i = 1; i < nargs; i++ ) { + if ( args[i] == "nocs" ) + noCS = true; + else + write_option_error( loc, args[i] ); + } + writeInit(); + } + else if ( args[0] == "exec" ) { + for ( int i = 1; i < nargs; i++ ) { + if ( args[i] == "noend" ) + noEnd = true; + else + write_option_error( loc, args[i] ); + } + collectReferences(); + writeExec(); + } + else if ( args[0] == "exports" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeExports(); + } + else if ( args[0] == "start" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeStart(); + } + else if ( args[0] == "first_final" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeFirstFinal(); + } + else if ( args[0] == "error" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeError(); + } + else if ( args[0] == "clear" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeClear(); + } + else { + /* EMIT An error here. */ + red->id->error(loc) << "unrecognized write command \"" << + args[0] << "\"" << std::endl; + } +} diff --git a/ragel/gendata.h b/ragel/gendata.h new file mode 100644 index 00000000..f34f2629 --- /dev/null +++ b/ragel/gendata.h @@ -0,0 +1,477 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _GENDATA_H +#define _GENDATA_H + +#include <iostream> +#include <string> +#include <vector> +#include "config.h" +#include "redfsm.h" +#include "common.h" +#include "fsmgraph.h" + +/* Forwards. */ +struct TransAp; +struct FsmAp; +struct PdBase; +struct InputData; +struct FsmGbl; +struct GenInlineList; +struct InlineItem; + +struct RedActionTable +: + public AvlTreeEl<RedActionTable> +{ + RedActionTable( const ActionTable &key ) + : + key(key), + id(0) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int id; +}; + +typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; + +struct NextRedTrans +{ + Key lowKey, highKey; + TransAp *trans; + TransAp *next; + + void load() { + if ( trans != 0 ) { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + NextRedTrans( TransAp *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + +struct RedBase +{ + RedBase( FsmGbl *id, FsmCtx *fsmCtx, FsmAp *fsm, std::string fsmName, int machineId ) + : + id(id), + fsmCtx(fsmCtx), + fsm(fsm), + fsmName(fsmName), + machineId(machineId), + keyOps(fsm->ctx->keyOps), + nextActionTableId(0) + { + } + + FsmGbl *id; + FsmCtx *fsmCtx; + FsmAp *fsm; + std::string fsmName; + int machineId; + + KeyOps *keyOps; + + ActionTableMap actionTableMap; + int nextActionTableId; +}; + +struct NameInst; +typedef DList<GenAction> GenActionList; + +typedef unsigned long ulong; + +void openHostBlock( char opener, InputData *id, std::ostream &out, const char *fileName, int line ); + +string itoa( int i ); + +struct Reducer + : public RedBase +{ + Reducer( FsmGbl *id, FsmCtx *fsmCtx, FsmAp *fsm, std::string fsmName, int machineId ) + : + RedBase( id, fsmCtx, fsm, fsmName, machineId ), + redFsm(0), + allActions(0), + allActionTables(0), + allConditions(0), + allCondSpaces(0), + allStates(0), + nameIndex(0), + startState(-1), + errState(-1), + getKeyExpr(0), + accessExpr(0), + prePushExpr(0), + postPopExpr(0), + nfaPrePushExpr(0), + nfaPostPopExpr(0), + pExpr(0), + peExpr(0), + eofExpr(0), + csExpr(0), + topExpr(0), + stackExpr(0), + actExpr(0), + tokstartExpr(0), + tokendExpr(0), + dataExpr(0), + hasLongestMatch(false) + { + } + + ~Reducer() + { + if ( redFsm != 0 ) + delete redFsm; + + delete[] allActions; + delete[] allActionTables; + delete[] allConditions; + delete[] allCondSpaces; + + actionTableMap.empty(); + + if ( getKeyExpr != 0 ) + delete getKeyExpr; + if ( accessExpr != 0 ) + delete accessExpr; + if ( prePushExpr != 0 ) + delete prePushExpr; + if ( postPopExpr != 0 ) + delete postPopExpr; + if ( nfaPrePushExpr != 0 ) + delete nfaPrePushExpr; + if ( nfaPostPopExpr != 0 ) + delete nfaPostPopExpr; + if ( pExpr != 0 ) + delete pExpr; + if ( peExpr != 0 ) + delete peExpr; + if ( eofExpr != 0 ) + delete eofExpr; + if ( csExpr != 0 ) + delete csExpr; + if ( topExpr != 0 ) + delete topExpr; + if ( stackExpr != 0 ) + delete stackExpr; + if ( actExpr != 0 ) + delete actExpr; + if ( tokstartExpr != 0 ) + delete tokstartExpr; + if ( tokendExpr != 0 ) + delete tokendExpr; + if ( dataExpr != 0 ) + delete dataExpr; + } + +protected: + /* Collected during parsing. */ + int curAction; + int curActionTable; + int curState; + + void makeKey( GenInlineList *outList, Key key ); + void makeText( GenInlineList *outList, InlineItem *item ); + void makeLmOnLast( GenInlineList *outList, InlineItem *item ); + void makeLmOnNext( GenInlineList *outList, InlineItem *item ); + void makeLmOnLagBehind( GenInlineList *outList, InlineItem *item ); + void makeLmSwitch( GenInlineList *outList, InlineItem *item ); + void makeLmNfaOnLast( GenInlineList *outList, InlineItem *item ); + void makeLmNfaOnNext( GenInlineList *outList, InlineItem *item ); + void makeLmNfaOnEof( GenInlineList *outList, InlineItem *item ); + void makeActionExec( GenInlineList *outList, InlineItem *item ); + void makeSetTokend( GenInlineList *outList, long offset ); + void makeSetAct( GenInlineList *outList, long lmId ); + void makeSubList( GenInlineList *outList, InlineList *inlineList, + GenInlineItem::Type type ); + void makeTargetItem( GenInlineList *outList, NameInst *nameTarg, + GenInlineItem::Type type ); + void makeExecGetTokend( GenInlineList *outList ); + void makeActionList(); + void makeAction( Action *action ); + void makeActionTableList(); + void makeConditions(); + void makeEntryPoints(); + bool makeNameInst( std::string &out, NameInst *nameInst ); + void makeStateList(); + + void makeStateActions( StateAp *state ); + void makeEofTrans( StateAp *state ); + void makeTransList( StateAp *state ); + void makeTrans( Key lowKey, Key highKey, TransAp *trans ); + void newTrans( RedStateAp *state, Key lowKey, Key highKey, RedTransAp *trans ); + + void makeSubList( GenInlineList *outList, const InputLoc &loc, + InlineList *inlineList, GenInlineItem::Type type ); + + void createMachine(); + void initActionList( unsigned long length ); + void newAction( int anum, std::string name, + const InputLoc &loc, GenInlineList *inlineList ); + void initActionTableList( unsigned long length ); + void initStateList( unsigned long length ); + void setStartState( unsigned long startState ); + void setErrorState( unsigned long errState ); + void addEntryPoint( char *name, unsigned long entryState ); + void setId( int snum, int id ); + void setFinal( int snum ); + void initTransList( int snum, unsigned long length ); + + void newTrans( int snum, int tnum, Key lowKey, Key highKey, + GenCondSpace *gcs, RedTransAp *trans ); + + void finishTransList( int snum ); + void setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ); + void setEofTrans( int snum, long targ, long eofAction ); + void setEofTrans( int snum, GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ); + void setForcedErrorState() + { redFsm->forcedErrorState = true; } + + void condSpaceItem( int cnum, long condActionId ); + void newCondSpace( int cnum, int condSpaceId ); + + void initStateCondList( int snum, ulong length ); + void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); + + + void resolveTargetStates( GenInlineList *inlineList ); + void resolveTargetStates(); + + + /* Gather various info on the machine. */ + void analyzeActionList( RedAction *redAct, GenInlineList *inlineList ); + void analyzeAction( GenAction *act, GenInlineList *inlineList ); + void actionActionRefs( RedAction *action ); + void transListActionRefs( RedTransList &list ); + void transActionRefs( RedTransAp *trans ); + void findFinalActionRefs(); + + void setValueLimits(); + void assignActionIds(); + + + void appendTrans( TransListVect &outList, Key lowKey, Key highKey, TransAp *trans ); + void reduceActionTables(); + +public: + + Key findMaxKey(); + void makeMachine(); + void makeExports(); + void makeGenInlineList( GenInlineList *outList, InlineList *inList ); + bool setAlphType( const HostLang *hostLang, const char *data ); + void analyzeMachine(); + void make( const HostLang *hostLang, const HostType *alphType ); + + /* + * Collecting the machine. + */ + + RedFsmAp *redFsm; + GenAction *allActions; + RedAction *allActionTables; + Condition *allConditions; + GenCondSpace *allCondSpaces; + RedStateAp *allStates; + NameInst **nameIndex; + int startState; + int errState; + GenActionList actionList; + CondSpaceList condSpaceList; + + GenInlineList *getKeyExpr; + GenInlineList *accessExpr; + GenInlineExpr *prePushExpr; + GenInlineExpr *postPopExpr; + + GenInlineExpr *nfaPrePushExpr; + GenInlineExpr *nfaPostPopExpr; + + /* Overriding variables. */ + GenInlineList *pExpr; + GenInlineList *peExpr; + GenInlineList *eofExpr; + GenInlineList *csExpr; + GenInlineList *topExpr; + GenInlineList *stackExpr; + GenInlineList *actExpr; + GenInlineList *tokstartExpr; + GenInlineList *tokendExpr; + GenInlineList *dataExpr; + + EntryIdVect entryPointIds; + EntryNameVect entryPointNames; + bool hasLongestMatch; + ExportList exportList; + Action *curInlineAction; +}; + +struct CodeGenArgs +{ + CodeGenArgs( FsmGbl *id, Reducer *red, HostType *alphType, + int machineId, std::string sourceFileName, + std::string fsmName, std::ostream &out, + CodeStyle codeStyle ) + : + id(id), + red(red), + alphType(alphType), + machineId(machineId), + sourceFileName(sourceFileName), + fsmName(fsmName), + out(out), + codeStyle(codeStyle), + lineDirectives(true), + forceVar(false), + loopLabels(false) + {} + + FsmGbl *id; + Reducer *red; + HostType *alphType; + int machineId; + std::string sourceFileName; + std::string fsmName; + std::ostream &out; + CodeStyle codeStyle; + bool lineDirectives; + GenLineDirectiveT genLineDirective; + bool forceVar; + bool loopLabels; +}; + +struct CodeGenData +{ + CodeGenData( const CodeGenArgs &args ) + : + red(args.red), + redFsm(args.red->redFsm), + sourceFileName(args.sourceFileName), + fsmName(args.fsmName), + keyOps(red->keyOps), + alphType(args.alphType), + out(args.out), + noEnd(false), + noPrefix(false), + noFinal(false), + noError(false), + noCS(false), + lineDirectives(args.lineDirectives), + cleared(false), + referencesCollected(false), + genLineDirective(args.id->hostLang->genLineDirective) + { + } + + /* + * The interface to the code generator. + */ + virtual void genAnalysis() = 0; + + /* These are invoked by writeStatement and are normally what are used to + * implement the code generators. */ + virtual void writeData() {}; + virtual void writeInit() {}; + virtual void writeExec() {}; + virtual void writeExports() {}; + virtual void writeStart() {}; + virtual void writeFirstFinal() {}; + virtual void writeError() {}; + virtual void writeClear(); + + /* Show some stats after a write data. */ + virtual void statsSummary() = 0; + + /* This can also be overridden to modify the processing of write + * statements. */ + virtual void writeStatement( InputLoc &loc, int nargs, + std::vector<std::string> &args, bool generateDot, const HostLang *hostLang ); + + /********************/ + + virtual ~CodeGenData() + { + } + + void clear() + { + delete red->redFsm; + red->redFsm = 0; + } + + void collectReferences(); + +protected: + + Reducer *red; + RedFsmAp *redFsm; + std::string sourceFileName; + std::string fsmName; + KeyOps *keyOps; + HostType *alphType; + ostream &out; + + /* Write options. */ + bool noEnd; + bool noPrefix; + bool noFinal; + bool noError; + bool noCS; + + void write_option_error( InputLoc &loc, std::string arg ); + + bool lineDirectives; + bool cleared; + + bool referencesCollected; + + void genOutputLineDirective( std::ostream &out ) const; + GenLineDirectiveT genLineDirective; +}; + +/* Selects and constructs the codegen based on the output options. */ +CodeGenData *makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ); +CodeGenData *asm_makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ); + +typedef AvlMap<char *, CodeGenData*, CmpStr> CodeGenMap; +typedef AvlMapEl<char *, CodeGenData*> CodeGenMapEl; + +#endif diff --git a/ragel/goto.cc b/ragel/goto.cc new file mode 100644 index 00000000..d3ca58f1 --- /dev/null +++ b/ragel/goto.cc @@ -0,0 +1,978 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "goto.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" + +#include <sstream> + +using std::ostringstream; + +IpLabel *Goto::allocateLabels( IpLabel *labels, IpLabel::Type type, int n ) +{ + if ( labels == 0 ) { + labels = new IpLabel[n]; + for ( int id = 0; id < n; id++ ) { + labels[id].type = type; + labels[id].stid = id; + } + } + + return labels; +} + +void Goto::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +/* Emit the goto to take for a given transition. */ +std::ostream &Goto::COND_GOTO( RedCondPair *cond ) +{ + out << "goto " << ctrLabel[cond->id].reference() << ";"; + return out; +} + +/* Emit the goto to take for a given transition. */ +std::ostream &Goto::TRANS_GOTO( RedTransAp *trans ) +{ + if ( trans->condSpace == 0 || trans->condSpace->condSet.length() == 0 ) { + /* Existing. */ + assert( trans->numConds() == 1 ); + RedCondPair *cond = trans->outCond( 0 ); + + /* Go to the transition which will go to the state. */ + out << "goto " << ctrLabel[cond->id].reference() << ";"; + } + else { + out << ck << " = 0;\n"; + for ( GenCondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " )\n" << ck << " += " << condValOffset << ";\n"; + } + CondKey lower = 0; + CondKey upper = trans->condFullSize() - 1; + COND_B_SEARCH( trans, lower, upper, 0, trans->numConds()-1 ); + + if ( trans->errCond() != 0 ) { + COND_GOTO( trans->errCond() ) << "\n"; + } + } + + return out; +} + +/* Write out the array of actions. */ +void Goto::taActions() +{ + actions.start(); + + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + +void Goto::GOTO_HEADER( RedStateAp *state ) +{ + /* Label the state. */ + out << "case " << state->id << ":\n"; +} + + +void Goto::SINGLE_SWITCH( RedStateAp *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "if ( " << GET_KEY() << " == " << + KEY(data[0].lowKey) << " ) {\n"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value) << "\n"; + out << "}\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "switch( " << GET_KEY() << " ) {\n"; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "case " << KEY(data[j].lowKey) << ": {\n"; + TRANS_GOTO(data[j].value) << "\n"; + out << "}\n"; + } + + /* Close off the transition switch. */ + out << "}\n"; + } +} + +void Goto::RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = keyOps->eq( data[mid].lowKey, lower ); + bool limitHigh = keyOps->eq( data[mid].highKey, upper ); + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + out << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + out << "} else {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << "} else {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << "if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << "} else {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid].lowKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << "if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + out << "{\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + } +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string Goto::CKEY( CondKey key ) +{ + ostringstream ret; + ret << key.getVal(); + return ret.str(); +} + +void Goto::COND_B_SEARCH( RedTransAp *trans, CondKey lower, + CondKey upper, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; +// RedCondEl *data = trans->outCond(0); + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + CondKey midKey = trans->outCondKey( mid ); + RedCondPair *midTrans = trans->outCond( mid ); + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = midKey == lower; + bool limitHigh = midKey == upper; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << "if ( " << ck << " < " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, lower, midKey-1, low, mid-1 ); + out << "} else if ( " << ck << " > " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, midKey+1, upper, mid+1, high ); + out << "} else {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << "if ( " << ck << " < " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, lower, midKey-1, low, mid-1); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << "} else {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << ck << " <= " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << "if ( " << ck << " > " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, midKey+1, upper, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << "} else {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << ck << " >= " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << "if ( " << ck << " == " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << "if ( " << ck << " <= " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << "if ( " << CKEY(midKey) << " <= " << ck << " )\n {"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + COND_GOTO(midTrans) << "\n"; + } + } +} + +void Goto::STATE_GOTO_ERROR() +{ + /* Bail out immediately. */ + out << " goto " << _again << ";\n"; +} + +void Goto::FROM_STATE_ACTION_EMIT( RedStateAp *state ) +{ + if ( state->fromStateAction != 0 ) { + /* Write every action in the list. */ + for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( state->id, false, + state->fromStateAction->anyNextStmt() ) ); + out << "\n"; + } + } +} + +std::ostream &Goto::STATE_CASES() +{ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + FROM_STATE_ACTION_EMIT( st ); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( st->eofTrans != 0 ) + TRANS_GOTO( st->eofTrans ); + + out << + " goto " << _again << ";\n" + "}\n" + "else {\n"; + } + + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + SINGLE_SWITCH( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) { + RANGE_B_SEARCH( st, keyOps->minKey, keyOps->maxKey, + 0, st->outRange.length() - 1 ); + } + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans ) << "\n"; + } + + if ( !noEnd && eof ) { + out << + "}\n"; + } + } + return out; +} + +std::ostream &Goto::TRANSITION( RedCondPair *pair ) +{ + /* Write the label for the transition so it can be jumped to. */ + if ( ctrLabel[pair->id].isReferenced ) + out << "_ctr" << pair->id << ": "; + + /* Destination state. */ + if ( pair->action != 0 && pair->action->anyCurStateRef() ) + out << ps << " = " << vCS() << ";"; + out << vCS() << " = " << pair->targ->id << "; "; + + if ( pair->action != 0 ) { + /* Write out the transition func. */ + out << "goto f" << pair->action->actListId << ";\n"; + } + else { + /* No code to execute, just loop around. */ + out << "goto " << _again << ";\n"; + } + return out; +} + +std::ostream &Goto::TRANSITIONS() +{ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + TRANSITION( &trans->p ); + } + + for ( CondApSet::Iter cond = redFsm->condSet; cond.lte(); cond++ ) + TRANSITION( &cond->p ); + + return out; +} + +unsigned int Goto::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int Goto::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +unsigned int Goto::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + +void Goto::taToStateActions() +{ + toStateActions.start(); + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + toStateActions.value( vals[st] ); + } + delete[] vals; + + toStateActions.finish(); +} + +void Goto::taFromStateActions() +{ + fromStateActions.start(); + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + fromStateActions.value( vals[st] ); + } + delete[] vals; + + fromStateActions.finish(); +} + +void Goto::taEofActions() +{ + eofActions.start(); + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = EOF_ACTION(st); + + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + eofActions.value( vals[st] ); + } + delete[] vals; + + eofActions.finish(); +} + +void Goto::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + vals[st->id] = 0; + //nfaOffsets.value( 0 ); + } + else { + vals[st->id] = offset; + //nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + for ( int st = 0; st < redFsm->nextStateId; st++ ) + nfaOffsets.value( vals[st] ); + delete[] vals; + + nfaOffsets.finish(); +} + +void Goto::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Goto::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Goto::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPopTrans.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + +void Goto::EOF_CHECK( ostream &ret ) +{ + ret << + " if ( " << P() << " == " << PE() << " )\n" + " goto " << _test_eof << ";\n"; +} + +void Goto::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << "; "; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << "goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::CURS( ostream &ret, bool inFinish ) +{ + ret << "(" << ps << ")"; +} + +void Goto::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << vCS() << ")"; +} + +void Goto::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << vCS() << " = " << nextDest << ";"; +} + +void Goto::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << vCS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ");"; +} + +void Goto::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << ";"; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << "; " << CLOSE_GEN_BLOCK(); +} + +void Goto::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void Goto::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << "-= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << "goto " << _again << ";" << CLOSE_GEN_BLOCK(); +} + +void Goto::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << "-= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << OPEN_GEN_BLOCK() << P() << " += 1; " << "goto " << _out << "; " << CLOSE_GEN_BLOCK(); +} + +void Goto::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << OPEN_GEN_BLOCK() << P() << " += 1; " << nbreak << " = 1; " << CLOSE_GEN_BLOCK(); +} + +void Goto::tableDataPass() +{ + if ( type == Loop ) + taActions(); + + taToStateActions(); + taFromStateActions(); + taEofActions(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Goto::genAnalysis() +{ + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose single. */ + redFsm->moveSelectTransToSingle(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + +void Goto::writeData() +{ + if ( type == Loop ) { + if ( redFsm->anyActions() ) + taActions(); + } + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + +void Goto::writeExec() +{ + int maxCtrId = redFsm->nextCondId > redFsm->nextTransId ? redFsm->nextCondId : redFsm->nextTransId; + ctrLabel = allocateLabels( ctrLabel, IpLabel::Ctr, maxCtrId ); + + out << "{\n"; + + DECLARE( INT(), cpc ); + DECLARE( INT(), ck ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), ps, " = 0" ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( UINT(), nacts ); + + out << "\n"; + + out << EMIT_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " if ( " << P() << " == " << PE() << " && " << P() << " != " << vEOF() << " )\n" + " goto " << _out << ";\n"; + } + else { + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto " << _out << ";\n"; + } + } + + NFA_PUSH( vCS() ); + + out << + " switch ( " << vCS() << " ) {\n"; + STATE_CASES() << + " }\n" + "\n"; + TRANSITIONS() << + "\n"; + + if ( redFsm->anyRegActions() ) + EXEC_FUNCS() << "\n"; + + out << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " goto " << _out << ";\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " goto " << _resume << ";\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " goto " << _out << ";\n" + "\n" + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + out << "goto " << _resume << ";\n"; + } + + out << EMIT_LABEL( _out ); + + out << "}\n"; +} diff --git a/ragel/goto.h b/ragel/goto.h new file mode 100644 index 00000000..dcf13448 --- /dev/null +++ b/ragel/goto.h @@ -0,0 +1,226 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_GOTO_H +#define _C_GOTO_H + +#include <iostream> +#include "codegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; +struct GenStateCond; + +struct IpLabel +{ + IpLabel() + : + type(None), + stid(0), + isReferenced(false) + {} + + enum Type + { + None = 1, + TestEof, + Ctr, + St, + Out, + Pop + }; + + std::string reference() + { + isReferenced = true; + return define(); + } + + std::string define() + { + std::stringstream ss; + switch ( type ) { + case None: break; + case TestEof: + ss << "_test_eof" << stid; + break; + case Ctr: + ss << "_ctr" << stid; + break; + case St: + ss << "_st" << stid; + break; + case Out: + ss << "_out" << stid; + break; + case Pop: + ss << "_pop" << stid; + break; + } + + return ss.str(); + } + + Type type; + int stid; + bool isReferenced; +}; + + +/* + * Goto driven fsm. + */ +class Goto + : public CodeGen +{ +public: + enum Type { + Loop = 1, + Exp, + Ip + }; + + Goto( const CodeGenArgs &args, Type type ) + : + CodeGen( args ), + type(type), + acts( "_acts" ), + nacts( "_nacts" ), + ck( "_ck" ), + nbreak( "_nbreak" ), + ps( "_ps" ), + _out("_out"), + _pop("_pop"), + _again("_again"), + _resume("_resume"), + _test_eof("_test_eof"), + actions( "actions", *this ), + toStateActions( "to_state_actions", *this ), + fromStateActions( "from_state_actions", *this ), + eofActions( "eof_actions", *this ), + ctrLabel(0) + {} + + void tableDataPass(); + virtual void genAnalysis(); + virtual void writeData(); + virtual void writeExec(); + + std::ostream &TRANSITION( RedCondPair *pair ); + + void FROM_STATE_ACTION_EMIT( RedStateAp *state ); + + std::ostream &STATE_CASES(); + std::ostream &TRANSITIONS(); + + Type type; + + Variable acts; + Variable nacts; + Variable ck; + Variable nbreak; + Variable ps; + + GotoLabel _out; + GotoLabel _pop; + GotoLabel _again; + GotoLabel _resume; + GotoLabel _test_eof; + + TableArray actions; + TableArray toStateActions; + TableArray fromStateActions; + TableArray eofActions; + + IpLabel *ctrLabel; + + void taActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofActions(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void EOF_CHECK( ostream &ret ); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + virtual unsigned int TO_STATE_ACTION( RedStateAp *state ); + virtual unsigned int FROM_STATE_ACTION( RedStateAp *state ); + virtual unsigned int EOF_ACTION( RedStateAp *state ); + + virtual std::ostream &EXEC_FUNCS() = 0; + virtual std::ostream &TO_STATE_ACTION_SWITCH() = 0; + virtual std::ostream &FROM_STATE_ACTION_SWITCH() = 0; + virtual std::ostream &EOF_ACTION_SWITCH() = 0; + + std::ostream &ACTIONS_ARRAY(); + + void setTableState( TableArray::State ); + + virtual std::ostream &COND_GOTO( RedCondPair *trans ); + + string CKEY( CondKey key ); + void COND_B_SEARCH( RedTransAp *trans, CondKey lower, CondKey upper, int low, int high); + + virtual std::ostream &TRANS_GOTO( RedTransAp *trans ); + + void SINGLE_SWITCH( RedStateAp *state ); + void RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ); + + /* Called from STATE_GOTOS just before writing the gotos */ + virtual void GOTO_HEADER( RedStateAp *state ); + virtual void STATE_GOTO_ERROR(); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ) = 0; + virtual void NFA_POP_TEST( RedNfaTarg *targ ) {} + virtual void NFA_FROM_STATE_ACTION_EXEC() = 0; + + void NFA_POP() {} + + virtual void FROM_STATE_ACTIONS() = 0; + virtual void TO_STATE_ACTIONS() = 0; + virtual void REG_ACTIONS() = 0; + virtual void EOF_ACTIONS() = 0; + + IpLabel *allocateLabels( IpLabel *labels, IpLabel::Type type, int n ); +}; + +#endif diff --git a/ragel/gotoexp.cc b/ragel/gotoexp.cc new file mode 100644 index 00000000..dea9029c --- /dev/null +++ b/ragel/gotoexp.cc @@ -0,0 +1,208 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "gotoexp.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" +#include "parsedata.h" +#include "inputdata.h" + +std::ostream &GotoExp::EXEC_FUNCS() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* We are at the start of a glob, write the case. */ + out << "f" << redAct->actListId << ":\n"; + + if ( redFsm->anyRegNbreak() ) + out << nbreak << " = 0;\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + + out << "goto " << _again << ";\n"; + } + } + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &GotoExp::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t" << CASE( STR( redAct->actListId+1 ) ) << "{\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &GotoExp::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t" << CASE( STR( redAct->actListId+1 ) ) << "{\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoExp::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\t" << CASE( STR( redAct->actListId+1 ) ) << "{\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, true, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +unsigned int GotoExp::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + return act; +} + +unsigned int GotoExp::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + return act; +} + +unsigned int GotoExp::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + return act; +} + +void GotoExp::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void GotoExp::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + + +void GotoExp::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[nfa_bp[nfa_len].state] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void GotoExp::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[" << vCS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void GotoExp::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << ARR_REF( toStateActions ) << "[" << vCS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void GotoExp::REG_ACTIONS() +{ + +} + +void GotoExp::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " switch ( " << ARR_REF( eofActions ) << "[" << vCS() << "] ) {\n"; + EOF_ACTION_SWITCH() << + " }\n"; + } + +} diff --git a/ragel/gotoexp.h b/ragel/gotoexp.h new file mode 100644 index 00000000..ddb3f138 --- /dev/null +++ b/ragel/gotoexp.h @@ -0,0 +1,75 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef SWITCH_GOTO_EXP_H +#define SWITCH_GOTO_EXP_H + +#include <iostream> +#include "goto.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * class GotoExp + */ +class GotoExp + : public Goto +{ +public: + GotoExp( const CodeGenArgs &args ) + : Goto(args, Exp) {} + + virtual std::ostream &EXEC_FUNCS(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + unsigned int TO_STATE_ACTION( RedStateAp *state ); + unsigned int FROM_STATE_ACTION( RedStateAp *state ); + unsigned int EOF_ACTION( RedStateAp *state ); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + virtual void NFA_FROM_STATE_ACTION_EXEC(); + + virtual void FROM_STATE_ACTIONS(); + virtual void TO_STATE_ACTIONS(); + virtual void REG_ACTIONS(); + virtual void EOF_ACTIONS(); +}; + +namespace C +{ + class GotoExp + : + public ::GotoExp + { + public: + GotoExp( const CodeGenArgs &args ) + : ::GotoExp( args ) + {} + }; +} + + +#endif diff --git a/ragel/gotoloop.cc b/ragel/gotoloop.cc new file mode 100644 index 00000000..dc536e6d --- /dev/null +++ b/ragel/gotoloop.cc @@ -0,0 +1,227 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "gotoloop.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" +#include "parsedata.h" +#include "inputdata.h" + +std::ostream &GotoLoop::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoLoop::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, true, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoLoop::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoLoop::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +void GotoLoop::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void GotoLoop::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + +std::ostream &GotoLoop::EXEC_FUNCS() +{ + /* Make labels that set acts and jump to execFuncs. Loop func indicies. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + out << " f" << redAct->actListId << ": " << + "" << acts << " = " << OFFSET( ARR_REF( actions ), itoa( redAct->location+1 ) ) << ";" + " goto execFuncs;\n"; + } + } + + out << + "\n" + "execFuncs:\n"; + + if ( redFsm->anyRegNbreak() ) + out << nbreak << " = 0;\n"; + + out << + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << " ) {\n"; + ACTION_SWITCH() << + " }\n" + " " << acts << " += 1;\n" + " " << nacts << " -= 1;\n" + " }\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + out << + " goto _again;\n"; + return out; +} + +void GotoLoop::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( fromStateActions ) + "[nfa_bp[nfa_len].state]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + +void GotoLoop::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), + ARR_REF( fromStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << "; " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << acts << " += 1;\n" + " " << nacts << " -= 1;\n" + " }\n" + "\n"; + } +} + +void GotoLoop::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), + ARR_REF( toStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << "; " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + string(acts) + "" ) << " ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + " " << acts << " += 1;\n" + " " << nacts << " -= 1;\n" + " }\n" + "\n"; + } +} + +void GotoLoop::REG_ACTIONS() +{ +} + +void GotoLoop::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " " << INDEX( ARR_TYPE( actions ), "__acts" ) << ";\n" + " " << UINT() << " __nacts;\n" + " __acts = " << OFFSET( ARR_REF( actions ), + ARR_REF( eofActions ) + "[" + vCS() + "]" ) << ";\n" + " __nacts = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "__acts" ) << "; __acts += 1;\n" + " while ( __nacts > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "__acts" ) << " ) {\n"; + EOF_ACTION_SWITCH() << + " }\n" + " __acts += 1;\n" + " __nacts -= 1;\n" + " }\n"; + } +} diff --git a/ragel/gotoloop.h b/ragel/gotoloop.h new file mode 100644 index 00000000..68c43ce2 --- /dev/null +++ b/ragel/gotoloop.h @@ -0,0 +1,72 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef SWITCH_GOTO_LOOP_H +#define SWITCH_GOTO_LOOP_H + +#include <iostream> +#include "goto.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; +struct GenStateCond; + +class GotoLoop + : public Goto +{ +public: + GotoLoop( const CodeGenArgs &args ) + : Goto(args, Loop) {} + + virtual std::ostream &ACTION_SWITCH(); + virtual std::ostream &EXEC_FUNCS(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + virtual void NFA_FROM_STATE_ACTION_EXEC(); + + virtual void FROM_STATE_ACTIONS(); + virtual void TO_STATE_ACTIONS(); + virtual void REG_ACTIONS(); + virtual void EOF_ACTIONS(); +}; + +namespace C +{ + class GotoLoop + : + public ::GotoLoop + { + public: + GotoLoop( const CodeGenArgs &args ) + : ::GotoLoop( args ) + {} + }; +} + +#endif diff --git a/ragel/host-asm/.gitignore b/ragel/host-asm/.gitignore new file mode 100644 index 00000000..b90cf7dc --- /dev/null +++ b/ragel/host-asm/.gitignore @@ -0,0 +1,11 @@ +/Makefile.in +/Makefile +/.deps +/.libs +/ragel-asm +/ragel-asm.exe +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-asm/CMakeLists.txt b/ragel/host-asm/CMakeLists.txt new file mode 100644 index 00000000..d1f22faa --- /dev/null +++ b/ragel/host-asm/CMakeLists.txt @@ -0,0 +1,24 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseAsm + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-asm main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-asm PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-asm + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-asm/Makefile.am b/ragel/host-asm/Makefile.am new file mode 100644 index 00000000..811e1049 --- /dev/null +++ b/ragel/host-asm/Makefile.am @@ -0,0 +1,25 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-asm + +ragel_asm_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_asm_SOURCES = main.cc + +nodist_ragel_asm_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_asm_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc + +EXTRA_DIST = rlparse.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseAsm -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + diff --git a/ragel/host-asm/main.cc b/ragel/host-asm/main.cc new file mode 100644 index 00000000..21f8a515 --- /dev/null +++ b/ragel/host-asm/main.cc @@ -0,0 +1,33 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" +#include "asm.h" + +extern struct colm_sections rlparseAsm; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangAsm, &rlparseAsm, 0 ); + return id.main( argc, argv ); +} + diff --git a/ragel/host-asm/rlparse.lm b/ragel/host-asm/rlparse.lm new file mode 100644 index 00000000..fd0b2c29 --- /dev/null +++ b/ragel/host-asm/rlparse.lm @@ -0,0 +1,204 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-c/.gitignore b/ragel/host-c/.gitignore new file mode 100644 index 00000000..4759c814 --- /dev/null +++ b/ragel/host-c/.gitignore @@ -0,0 +1,12 @@ +/.deps/ +/.libs/ +/Makefile +/Makefile.in +/ragel-c +/ragel-c.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-c/CMakeLists.txt b/ragel/host-c/CMakeLists.txt new file mode 100644 index 00000000..1c97a41c --- /dev/null +++ b/ragel/host-c/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseC + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcC + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-c main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-c PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-c + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-c/Makefile.am b/ragel/host-c/Makefile.am new file mode 100644 index 00000000..45ffa468 --- /dev/null +++ b/ragel/host-c/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-c + +ragel_c_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_c_SOURCES = main.cc rlhc.c + +nodist_ragel_c_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_c_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseC -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm ../ril.lm + $(COLM) -I.. -c -b rlhcC -o $@ $< diff --git a/ragel/host-c/main.cc b/ragel/host-c/main.cc new file mode 100644 index 00000000..20c7e6da --- /dev/null +++ b/ragel/host-c/main.cc @@ -0,0 +1,47 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseC; +extern struct colm_sections rlhcC; + +const HostLang hostLangC_translated = +{ + hostTypesC, + 8, + 0, + true, + false, /* loopLabels */ + Translated, + GotoFeature, + &makeCodeGen, + &defaultOutFnC, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangC_translated, &rlparseC, &rlhcC ); + return id.rlhcMain( argc, argv ); +} + diff --git a/ragel/host-c/rlhc.lm b/ragel/host-c/rlhc.lm new file mode 100644 index 00000000..4e44a5f5 --- /dev/null +++ b/ragel/host-c/rlhc.lm @@ -0,0 +1,459 @@ +include 'ril.lm' + +namespace c_out + + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def c_out + [_IN_ _EX_ item*] +end + +namespace c_gen + global _: parser<c_out::c_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case Stmt { + "{ + " [stmt_list( StmtList )] + "} + } + case Expr { + "([expr( Expr )])" + } + case Escape { + Str: str = $escape + "[Str.suffix( 1 )]" + } + default { + [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case Expr + { + "([tok_list( TL )])" + } + case Stmt + { + "{ + " [tok_list( TL )] + "} + } + case Bare + { + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + [embedded_host( EH )] + } + case Paren + { + "([expr( expr )])" + } + case ArraySub + { + "[ident]\[[expr( expr )]\]" + } + case ArraySubField + { + "[ident]\[[expr( expr )]\].[Field]" + } + case Offset + { + "[ident] + [expr( expr )]" + } + case Deref + { + "(*( [expr(expr)] )) + } + case [`TRUE] + { + "1" + } + case [`FALSE] + { + "1" + } + case [N: `nil] + { + "0" + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast `( T: type `) F: expr_factor] + { + "( [type( T )] ) [expr_factor( F )]" + } + default { + # Catches cases not specified + [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + [ident O expr( TL ) C] + } + case [I: ident `[ E: expr `] `. F: ident] + { + "[I]\[[ expr( E )]\].[F] + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + default { + # Catches cases not specified + [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + ['!' expr_factor_op( _expr_factor_op )] + } + case [T: `~ expr_factor_op] + { + ['~' expr_factor_op( _expr_factor_op )] + } + case [expr_factor] + { + [expr_factor( expr_factor )] + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + [expr_bitwise( _expr_bitwise ) A expr_factor_op( expr_factor_op )] + } + case [expr_factor_op] + { + [expr_factor_op( expr_factor_op )] + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + [expr_mult( _expr_mult ) T expr_bitwise( expr_bitwise )] + } + case [expr_bitwise] + { + [expr_bitwise( expr_bitwise )] + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + [expr_add( _expr_add ) Op expr_mult( expr_mult )] + } + case [expr_mult] + { + [expr_mult( expr_mult )] + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + [expr_shift( _expr_shift ) Op expr_add( expr_add )] + } + case [expr_add] + { + [expr_add( expr_add )] + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + [expr_test( _expr_test ) Op expr_shift( expr_shift )] + } + case [expr_shift] + { + [expr_shift( expr_shift )] + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case S8 + ['signed char '] + case S16 + ['short '] + case S32 + ['int '] + case S64 + ['long '] + case S128 + ['long long '] + case "uint" + ['unsigned int '] + default + [Type] + } + + void number( Number: number ) + { + switch Number + case Unsigned + "[uint]u" + default + [Number] + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + "[number( Number )], " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + [embedded_host( EH )] + } + case [A: static_array] { + "static const [type(A.type)] " + "[A.ident] \[\] = { [num_list(A.num_list)] }; + } + case [V: static_value] { + "static const [V.type] [V.ident] = [V.number]; + } + case [ + `if `( IfExpr: expr `) + IfStmt: stmt + ElseIfClauseList: else_if_clause* + ElseClauseOpt: else_clause? + ] { + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + [`else `if `( ElseIfExpr: expr `) ElseIfStmt: stmt] + + "else if ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + "else + " [stmt(ElseStmt)] + } + } + case [`while `( WhileExpr: expr `) WhileStmt: stmt] { + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case [M: match_stmt] { + "switch ( [expr(M.E)] ) { + + for PB: pat_block in repeat( M.P ) { + "case [expr( PB.expr )]: + "[stmt_list( PB._repeat_stmt )] + "break; + } + + if match M.D [D: default_block] { + "default: + "[stmt_list( D._repeat_stmt )] + "break; + } + + "} + } + case [`switch `( SwitchExpr: expr `) `{ StmtList: stmt* `}] { + "switch ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + "} + } + case [ES: expr_stmt] { + "[expr(ES.expr)]; + } + case [B: block] { + "{ + " [stmt_list(B.StmtList)] + "} + } + case [ + OptConst: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + "[OptConst] [type(Type)] [Ident]" + + if match OptInit [`= Init: expr] { + " = [expr(Init)] + } + + "; + } + case [Export: export_stmt] + { + "#define [Export.ident] [number(Export.number)] + } + case [fallthrough] + { + # Nothing needed here. + # C falls through by default. + } + case [Index: index_stmt] + { + "const [type(Index.type)] *[Index.ident] + + if match Index.opt_init [E: `= expr] { + [E expr(Index.opt_init.expr)] + } + + "; + } + case [CB: case_block] + { + "case [expr( CB.expr )]: + "[stmt_list( CB._repeat_stmt )] + "break; + } + case [DB: default_block] + { + "default: + "[stmt_list( DB._repeat_stmt )] + "break; + } + case [CL: case_label] + { + "case [expr( CL.expr )]: + } + case [AS: assign_stmt] + { + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + _ = new parser<c_out::c_out>() + + stmt_list( Start._repeat_stmt ) + + CO: c_out::c_out = _->finish() + + if CO { + send Output + [CO] + } + else { + send stderr + "failed to parse output: [_->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + c_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-c/rlparse.lm b/ragel/host-c/rlparse.lm new file mode 100644 index 00000000..3d315492 --- /dev/null +++ b/ragel/host-c/rlparse.lm @@ -0,0 +1,203 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end +end + + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-crack/.gitignore b/ragel/host-crack/.gitignore new file mode 100644 index 00000000..e75770cb --- /dev/null +++ b/ragel/host-crack/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-crack +/ragel-crack.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-crack/CMakeLists.txt b/ragel/host-crack/CMakeLists.txt new file mode 100644 index 00000000..e28b4fa7 --- /dev/null +++ b/ragel/host-crack/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseCrack + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcCrack + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-crack main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-crack PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-crack + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-crack/Makefile.am b/ragel/host-crack/Makefile.am new file mode 100644 index 00000000..98da9e06 --- /dev/null +++ b/ragel/host-crack/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-crack + +ragel_crack_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_crack_SOURCES = main.cc rlhc.c + +nodist_ragel_crack_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_crack_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseCrack -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcCrack -o $@ $< diff --git a/ragel/host-crack/main.cc b/ragel/host-crack/main.cc new file mode 100644 index 00000000..dd45fd1b --- /dev/null +++ b/ragel/host-crack/main.cc @@ -0,0 +1,63 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseCrack; +extern struct colm_sections rlhcCrack; + +/* + * Crack + */ +const char *defaultOutFnCrack( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".crk" ); +} + +HostType hostTypesCrack[] = +{ + { "byte", 0, "byte", false, true, true, 0, 0, 0, UCHAR_MAX, 1 }, + { "int32", 0, "int32", true, true, false, S32BIT_MIN, S32BIT_MAX, 0, 0, 4 }, + { "uint32", 0, "uint32", false, true, false, 0, 0, 0, U32BIT_MAX, 4 }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "uint", 0, "uint", false, true, false, 0, 0, 0, UINT_MAX, sizeof(int) }, +}; + +const HostLang hostLangCrack = +{ + hostTypesCrack, + 5, + 0, + true, + false, /* loopLabels */ + Translated, + BreakFeature, + &makeCodeGen, + &defaultOutFnCrack, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangCrack, &rlparseCrack, &rlhcCrack ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-crack/ragel-crack.lm b/ragel/host-crack/ragel-crack.lm new file mode 100644 index 00000000..b6480012 --- /dev/null +++ b/ragel/host-crack/ragel-crack.lm @@ -0,0 +1,150 @@ +namespace crack_inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token c_any / any / + end + + def inline_expr + [expr_item*] + + def expr_item + [expr_any] :ExprAny + | [expr_symbol] :ExprSymbol + | [expr_interpret] :ExprInterpret + + def expr_any + [whitespace] + | [comment] + | [string] + | [number] + | [hex_number] + | [ident] + | [c_any] + + def expr_symbol + [`,] | [`(] | [`)] | [`*] | [`::] + + def expr_interpret + [`fpc] :Fpc + | [`fc] :Fc + | [`fcurs] :Fcurs + | [`ftargs] :Ftargs + | [`fentry `( state_ref srlex::`)] :Fentry + + def state_ref + [opt_name_sep state_ref_names] + + def opt_name_sep + [srlex::`::] :ColonColon + | [] :Empty + + # List of names separated by :: + def state_ref_names + [state_ref_names srlex::`:: srlex::word] :Rec + | [srlex::word] :Base + + def inline_block + [block_item*] + + def block_item + [expr_any] :ExprAny + | [block_symbol] :BlockSymbol + | [block_interpret] :BlockInterpret + | [`{ inline_block `}] :RecBlock + + def block_symbol + [`,] | [`;] | [`(] | [`)] | [`*] | [`::] + + def block_interpret + [expr_interpret] :ExprInterpret + | [`fhold whitespace? `;] :Fhold + | [`fgoto whitespace? `* inline_expr `;] :FgotoExpr + | [`fnext whitespace? `* inline_expr `;] :FnextExpr + | [`fcall whitespace? `* inline_expr `;] :FcallExpr + | [`fncall whitespace? `* inline_expr `;] :FncallExpr + | [`fexec inline_expr `;] :Fexec + | [`fgoto state_ref srlex::`;] :FgotoSr + | [`fnext state_ref srlex::`;] :FnextSr + | [`fcall state_ref srlex::`;] :FcallSr + | [`fncall state_ref srlex::`;] :FncallSr + | [`fret `;] :Fret + | [`fnret `;] :Fnret + | [`fbreak `;] :Fbreak + | [`fnbreak `;] :Fnbreak +end + + +namespace crack_host + lex + literal `%%{ + + token slr /'%%' [^{] [^\n]* '\n'/ + { + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + rl bt_literal + / '`' ([^`\\] | NL | '\\' (any | NL))* '`' / + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + /c_comment | cpp_comment/ + + token string + / s_literal | d_literal | bt_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [ident] + | [number] + | [hex_number] + | [comment] + | [string] + | [whitespace] + | [c_any] + + def section + [`%%{ ragel::ragel_start ragel::`}%%] :MultiLine + | [tok] :Tok +end + + diff --git a/ragel/host-crack/rlhc.lm b/ragel/host-crack/rlhc.lm new file mode 100644 index 00000000..e5d2d576 --- /dev/null +++ b/ragel/host-crack/rlhc.lm @@ -0,0 +1,533 @@ +include 'ril.lm' + +namespace crack_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" | + "`" ( [^`\\] | '\\' any ) * "`" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def crack_out + [_IN_ _EX_ item*] +end + +namespace crack_gen + + global Parser: parser<crack_out::crack_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] { + send Parser + "if ( 1 ) { + " [stmt_list( StmtList )] + "} + } + case [host::`={ Expr: expr host::`}=] { + send Parser + "([expr( Expr )])" + } + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser + [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "if ( 1 ) { + " [tok_list( TL )] + "} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [`( E: expr `)] + { + send Parser + "([expr(E)])" + } + case [I: ident `[ E: expr `]] + { + send Parser + "[I]\[[expr( E )]\]" + } + case [`offset `( ident `, expr `)] + { + send Parser + [expr( ExprFactor.expr )] + } + case [`deref `( ident `, expr `)] + { + send Parser + [ ExprFactor.ident '[' expr( ExprFactor.expr ) ']'] + } + case [`TRUE] + { + send Parser "1" + } + case [`FALSE] + { + send Parser "1" + } + case [N: `nil] + { + send Parser "0" + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast `( T: type `) F: expr_factor] + { + send Parser + "[type( T )] ( [expr_factor( F )] )" + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['int16 '] + case "s16" + send Parser ['int16 '] + case "s32" + send Parser ['int32 '] + case "s64" + send Parser ['int64 '] + case "s128" + send Parser ['long long '] + case "uint" + send Parser ['uint32 '] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]u" + case [`c `( uint `) ] + send Parser "[Number.uint]" + default + send Parser [Number] + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )], " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [Array: static_array] { + send Parser + "const array\[[type(Array.type)]\] " + "[Array.ident] = \[ [num_list(Array.num_list)] \]; + } + case [Value: static_value] { + send Parser + "const [Value.type] [Value.ident] = [Value.number]; + } + case [ + `if `( IfExpr: expr `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [flow_stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) + " [flow_stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [flow_stmt(ElseStmt)] + } + } + case [`while `( WhileExpr: expr `) WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [flow_stmt(WhileStmt)] + } + case [`switch `( SwitchExpr: expr `) `{ StmtList: stmt* `}] { + + require StmtList + [`case E1: expr `{ Inner: stmt* `} Rest: stmt*] + + send Parser + "if ( [expr(SwitchExpr)] == [expr(E1)] ) { + " [stmt_list(Inner)] + "} + + for S: stmt in repeat(Rest) { + switch S + case [`case E1: expr `{ Inner: stmt* `}] + { + send Parser + "else if ( [expr(SwitchExpr)] == [expr(E1)] ) { + " [stmt_list(Inner)] + "} + } + case + [`default `{ Inner: stmt* `}] + { + send Parser + "else { + " [stmt_list(Inner)] + "} + } + } + + send Parser + "; + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [`{ TL: stmt* `}] { + send Parser + "if ( 1 ) { + " [stmt_list(TL)] + "} + } + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + [TypeList type(Type) Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "#define [Export.ident] [number(Export.number)] + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "int [Index.ident] + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + else { + send Parser + " = 0" + } + + send Parser "; + } + case [case_block] + { + send Parser + "case [expr( Stmt.case_block.expr )]: + "[stmt_list( Stmt.case_block._repeat_stmt )] + "break; + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + } + case [case_label] + { + send Parser + "case [expr( Stmt.case_label.expr )]: + } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void flow_stmt( Stmt: stmt ) + { + switch Stmt + case [`{ TL: stmt* `}] { + send Parser + "{ + " [stmt_list(TL)] + "} + } + default { + stmt( Stmt ) + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<crack_out::crack_out>() + + stmt_list( Start._repeat_stmt ) + + CO: crack_out::crack_out = Parser->finish() + + if CO { + send Output + [CO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + crack_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-crack/rlparse.lm b/ragel/host-crack/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-crack/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-csharp/.gitignore b/ragel/host-csharp/.gitignore new file mode 100644 index 00000000..9065abc4 --- /dev/null +++ b/ragel/host-csharp/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-csharp +/ragel-csharp.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-csharp/CMakeLists.txt b/ragel/host-csharp/CMakeLists.txt new file mode 100644 index 00000000..8d70e5a6 --- /dev/null +++ b/ragel/host-csharp/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseCSharp + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcCSharp + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-csharp main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-csharp PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-csharp + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-csharp/Makefile.am b/ragel/host-csharp/Makefile.am new file mode 100644 index 00000000..56c6933e --- /dev/null +++ b/ragel/host-csharp/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-csharp + +ragel_csharp_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_csharp_SOURCES = main.cc rlhc.c + +nodist_ragel_csharp_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_csharp_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseCSharp -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcCSharp -o $@ $< diff --git a/ragel/host-csharp/main.cc b/ragel/host-csharp/main.cc new file mode 100644 index 00000000..fa5cb5a9 --- /dev/null +++ b/ragel/host-csharp/main.cc @@ -0,0 +1,72 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseCSharp; +extern struct colm_sections rlhcCSharp; + +/* + * C# + */ + +const char *defaultOutFnCSharp( const char *inputFileName ) +{ + const char *ext = findFileExtension( inputFileName ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + return fileNameFromStem( inputFileName, ".h" ); + else + return fileNameFromStem( inputFileName, ".cs" ); +} + +HostType hostTypesCSharp[] = +{ + { "sbyte", 0, "sbyte", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, 1 }, + { "byte", 0, "byte", false, true, false, 0, 0, 0, UCHAR_MAX, 1 }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, 2 }, + { "ushort", 0, "ushort", false, true, false, 0, 0, 0, USHRT_MAX, 2 }, + { "char", 0, "char", false, true, true, 0, 0, 0, USHRT_MAX, 2 }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, 4 }, + { "uint", 0, "uint", false, true, false, 0, 0, 0, UINT_MAX, 4 }, + { "long", 0, "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, 8 }, + { "ulong", 0, "ulong", false, true, false, 0, 0, 0, ULONG_MAX, 8 }, +}; + +const HostLang hostLangCSharp = +{ + hostTypesCSharp, + 9, + 4, + true, + false, /* loopLabels */ + Translated, + GotoFeature, + &makeCodeGen, + &defaultOutFnCSharp, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangCSharp, &rlparseCSharp, &rlhcCSharp ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-csharp/rlhc.lm b/ragel/host-csharp/rlhc.lm new file mode 100644 index 00000000..a108a4a8 --- /dev/null +++ b/ragel/host-csharp/rlhc.lm @@ -0,0 +1,477 @@ +include 'ril.lm' + +namespace csharp_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def csharp_out + [_IN_ _EX_ item*] +end + +namespace csharp_gen + + global Parser: parser<csharp_out::csharp_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] + send Parser "{[stmt_list( StmtList )]}" + case [host::`={ Expr: expr host::`}=] + send Parser "([expr( Expr )])" + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser [Tok] + } + } + } + + void embedded_host( EH: embedded_host ) + { + switch EH + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "{[tok_list( TL )]} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case + [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [O:`( TL: expr C: `)] + { + send Parser + [O expr(TL) C] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case ['offset' '(' ident ',' expr ')'] + { + send Parser + [expr( ExprFactor.expr )] + } + case ['deref' '(' ident ',' expr ')'] + { + send Parser + [ ExprFactor.ident '[' expr( ExprFactor.expr ) ']'] + } + case [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case [N: `nil] + { + N.data = '0' + send Parser [N] + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast Open: `( Type: type Close: `) expr_factor] + { + send Parser [Open] + type( Type ) + send Parser [Close] + expr_factor( ExprFactor._expr_factor ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [E1: embedded_host `-> E2: lvalue] + { + embedded_host( E1 ) + lvalue( E2 ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['sbyte '] + case "s16" + send Parser ['short '] + case "s32" + send Parser ['int '] + case "s64" + send Parser ['long '] + case "s128" + send Parser ['long long '] + case "uint" + send Parser ['uint '] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`c `( uint `) ] { + Str: str = $Number.uint + send Parser "'\\u[sprintf( "%04x", Str.atoi() )]'" + } + case [`u `( uint `) ] { + send Parser [$Number.uint] + } + default { + send Parser [$Number.sint] + } + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )], " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + send Parser + "static readonly [type(A.type)] \[\]" + "[A.ident] = { [num_list( A.num_list )] }; + } + case [V: static_value] { + send Parser + "static readonly [V.type] [V.ident] = [V.number]; + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + send Parser + "switch ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + "} + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + [L stmt_list(TL) R] + } + case [ + TypeList: opt_const Type: type Ident: ident + OptInit: opt_init Semi: `; + ] + { + send Parser + [TypeList type(Type) Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "#define [Export.ident] [Export.number] + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "int [Index.ident]" + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + + send Parser ";" + } + case [case_block] + { + send Parser + "case [expr( Stmt.case_block.expr )]: + "[stmt_list( Stmt.case_block._repeat_stmt )] + "break; + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + } + case [case_label] + { + send Parser + "case [expr( Stmt.case_label.expr )]: + } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<csharp_out::csharp_out>() + + stmt_list( Start._repeat_stmt ) + + CSO: csharp_out::csharp_out = Parser->finish() + + if CSO { + send Output + [CSO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + csharp_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-csharp/rlparse.lm b/ragel/host-csharp/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-csharp/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-d/.gitignore b/ragel/host-d/.gitignore new file mode 100644 index 00000000..fd73937b --- /dev/null +++ b/ragel/host-d/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-d +/ragel-d.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-d/CMakeLists.txt b/ragel/host-d/CMakeLists.txt new file mode 100644 index 00000000..c302fabc --- /dev/null +++ b/ragel/host-d/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseD + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcD + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-d main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-d PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-d + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-d/Makefile.am b/ragel/host-d/Makefile.am new file mode 100644 index 00000000..ab9a0af8 --- /dev/null +++ b/ragel/host-d/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-d + +ragel_d_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_d_SOURCES = main.cc rlhc.c + +nodist_ragel_d_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_d_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseD -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcD -o $@ $< diff --git a/ragel/host-d/main.cc b/ragel/host-d/main.cc new file mode 100644 index 00000000..f6eb7cbe --- /dev/null +++ b/ragel/host-d/main.cc @@ -0,0 +1,72 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseD; +extern struct colm_sections rlhcD; + +/* + * D + */ + +const char *defaultOutFnD( const char *inputFileName ) +{ + const char *ext = findFileExtension( inputFileName ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + return fileNameFromStem( inputFileName, ".h" ); + else + return fileNameFromStem( inputFileName, ".d" ); +} + +HostType hostTypesD[] = +{ + { "byte", 0, "byte", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, 1 }, + { "ubyte", 0, "ubyte", false, true, false, 0, 0, 0, UCHAR_MAX, 1 }, + { "char", 0, "char", false, true, false, 0, 0, 0, UCHAR_MAX, 1 }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, 2 }, + { "ushort", 0, "ushort", false, true, false, 0, 0, 0, USHRT_MAX, 2 }, + { "wchar", 0, "wchar", false, true, false, 0, 0, 0, USHRT_MAX, 2 }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, 4 }, + { "uint", 0, "uint", false, true, false, 0, 0, 0, UINT_MAX, 4 }, + { "dchar", 0, "dchar", false, true, false, 0, 0, 0, UINT_MAX, 4 }, +}; + +const HostLang hostLangD = +{ + hostTypesD, + 9, + 2, + true, + false, /* loopLabels */ + Translated, + GotoFeature, + &makeCodeGen, + &defaultOutFnD, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangD, &rlparseD, &rlhcD ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-d/rlhc.lm b/ragel/host-d/rlhc.lm new file mode 100644 index 00000000..14f2f192 --- /dev/null +++ b/ragel/host-d/rlhc.lm @@ -0,0 +1,508 @@ +include 'ril.lm' + +namespace d_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def d_out + [_IN_ _EX_ item*] +end + +namespace d_gen + + global Parser: parser<d_out::d_out> + + global HasDefault: list<int> = new list<int>() + + void pushHasDef( H: int ) + { + HasDefault->push( H ) + } + + int popHasDef() + { + return HasDefault->pop() + } + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] + send Parser "{[stmt_list( StmtList )]}" + case [host::`={ Expr: expr host::`}=] + send Parser "([expr( Expr )])" + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "{[tok_list( TL )]} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host(EH)] + } + case + [O:`( TL: expr C: `)] + { + send Parser + [O expr( TL ) C] + } + case + [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr(TL) C] + } + case + [`offset `( ident `, expr `)] + { + send Parser + "& [ExprFactor.ident] \[ [expr(ExprFactor.expr)] \] + } + case + [`deref `( ident `, expr `)] + { + send Parser + "(*( [expr(ExprFactor.expr)] )) + } + case + [T: `TRUE] + { + T.data = '1' + send Parser [T] + } + case + [F: `FALSE] + { + F.data = '0' + send Parser [F] + } + case + [N: `nil] + { + N.data = 'null' + send Parser [N] + } + case + [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast Open: `( Type: type Close: `) expr_factor] + { + send Parser ['cast' Open] + type( Type ) + send Parser [Close] + expr_factor( ExprFactor._expr_factor ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host(EH)] + } + case + [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr(TL) C] + } + case [E1: embedded_host `-> E2: lvalue] + { + embedded_host( E1 ) + lvalue( E2 ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['byte '] + case "s16" + send Parser ['short '] + case "s32" + send Parser ['int '] + case "s64" + send Parser ['long '] + case "s128" + send Parser ['long long '] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]u" + default + send Parser [Number] + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )], " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host(EH)] + } + case [A: static_array] { + send Parser + "static const [type(A.type)]\[\] " + "[A.ident] = \[ [num_list( A.num_list )] \]; + } + case [V: static_value] { + send Parser + "static const [V.type] [V.ident] = [V.number]; + } + case [ + `if `( IfExpr: expr `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + [`else `if `( ElseIfExpr: expr `) ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + } + case [`while `( WhileExpr: expr `) WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case [`switch `( SwitchExpr: expr `) `{ StmtList: stmt* `}] { + pushHasDef( 0 ) + + send Parser + "switch ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + + if ( popHasDef() == 0 ) { + send Parser + " default: break; + } + + send Parser + "} + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr( ExprExpr ) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + [L stmt_list( TL ) R] + } + case [ + OptConst: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + if match OptConst ['const'] { + send Parser + "const( [type( Type )] ) + } + else { + type( Type ) + } + + send Parser [Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser [Semi] + } + case [case_block] + { + send Parser + "case [expr( Stmt.case_block.expr )]: + "[stmt_list( Stmt.case_block._repeat_stmt )] + "break; + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + + popHasDef() + pushHasDef( 1 ) + } + case [case_label] + { + send Parser + "case [expr( Stmt.case_label.expr )]: + } + case [export_stmt] + { + send Parser + "static const [type(Stmt.export_stmt.type)] " + "[Stmt.export_stmt.ident] = [number(Stmt.export_stmt.number)]; + } + case ['fallthrough' ';'] + { + send Parser "goto case;" + } + case [Index: index_stmt] + { + send Parser + "const([type(Index.type)]) *[Index.ident]" + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + + send Parser "; + } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<d_out::d_out>() + + stmt_list( Start._repeat_stmt ) + + DO: d_out::d_out = Parser->finish() + + if DO { + send Output + [DO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + d_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-d/rlparse.lm b/ragel/host-d/rlparse.lm new file mode 100644 index 00000000..ddc5157b --- /dev/null +++ b/ragel/host-d/rlparse.lm @@ -0,0 +1,211 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + else + { + if ( includeDepth == 0 ) { + id->curItem->data.write( "define ", 7 ); + id->curItem->data.write( $ident->data, $ident->length ); + id->curItem->data.write( " ", 1 ); + id->curItem->data.write( $number->data, $number->length ); + } + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-go/.gitignore b/ragel/host-go/.gitignore new file mode 100644 index 00000000..41aff77f --- /dev/null +++ b/ragel/host-go/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-go +/ragel-go.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-go/CMakeLists.txt b/ragel/host-go/CMakeLists.txt new file mode 100644 index 00000000..2ae44311 --- /dev/null +++ b/ragel/host-go/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseGo + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcGo + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-go main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-go PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-go + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-go/Makefile.am b/ragel/host-go/Makefile.am new file mode 100644 index 00000000..2a4b67fe --- /dev/null +++ b/ragel/host-go/Makefile.am @@ -0,0 +1,29 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-go + +data_DATA = out-go.lm + +ragel_go_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_go_SOURCES = main.cc rlhc.c + +nodist_ragel_go_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_go_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseGo -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcGo -o $@ $< diff --git a/ragel/host-go/main.cc b/ragel/host-go/main.cc new file mode 100644 index 00000000..99fb50d7 --- /dev/null +++ b/ragel/host-go/main.cc @@ -0,0 +1,70 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseGo; +extern struct colm_sections rlhcGo; + +/* + * Go + */ + +const char *defaultOutFnGo( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".go" ); +} + +HostType hostTypesGo[] = +{ + { "byte", 0, "uint8", false, true, false, 0, 0, U8BIT_MIN, U8BIT_MAX, 1 }, + { "int8", 0, "int8", true, true, false, S8BIT_MIN, S8BIT_MAX, 0, 0, 1 }, + { "uint8", 0, "uint8", false, true, false, 0, 0, U8BIT_MIN, U8BIT_MAX, 1 }, + { "int16", 0, "int16", true, true, false, S16BIT_MIN, S16BIT_MAX, 0, 0, 2 }, + { "uint16", 0, "uint16", false, true, false, 0, 0, U16BIT_MIN, U16BIT_MAX, 2 }, + { "int32", 0, "int32", true, true, false, S32BIT_MIN, S32BIT_MAX, 0, 0, 4 }, + { "uint32", 0, "uint32", false, true, false, 0, 0, U32BIT_MIN, U32BIT_MAX, 4 }, + { "int64", 0, "int64", true, true, false, S64BIT_MIN, S64BIT_MAX, 0, 0, 8 }, + { "uint64", 0, "uint64", false, true, false, 0, 0, U64BIT_MIN, U64BIT_MAX, 8 }, + { "rune", 0, "int32", true, true, true, S32BIT_MIN, S32BIT_MAX, 0, 0, 4 }, +}; + +const HostLang hostLangGo = +{ + hostTypesGo, + 10, + 0, + false, + false, /* loopLabels */ + Translated, + GotoFeature, + &makeCodeGen, + &defaultOutFnGo, + &genLineDirectiveTrans +}; + + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangGo, &rlparseGo, &rlhcGo ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-go/out-go.lm b/ragel/host-go/out-go.lm new file mode 100644 index 00000000..63d2731b --- /dev/null +++ b/ragel/host-go/out-go.lm @@ -0,0 +1,47 @@ +namespace out_go + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def out_go + [_IN_ _EX_ item*] +end + + diff --git a/ragel/host-go/rlhc.lm b/ragel/host-go/rlhc.lm new file mode 100644 index 00000000..28eb0e90 --- /dev/null +++ b/ragel/host-go/rlhc.lm @@ -0,0 +1,404 @@ +include 'ril.lm' + +include 'out-go.lm' + +namespace go_gen + + global _: parser<out_go::out_go> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case Stmt { + "{ + stmt_list( StmtList ) + "} + } + case Expr { + "([expr( Expr )])" + } + case Escape { + Str: str = $Tok + "[Str.suffix( 1 )]" + } + default { + [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case Expr + { + ['(' tok_list( TL ) ')'] + } + case Stmt + { + ['{' tok_list( TL ) '}\n'] + } + case Bare + { + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case EmbeddedHost + { + [embedded_host(embedded_host)] + } + case Paren + { + ['( ' expr(expr) ' )'] + } + case ArraySub + { + [ident '[ ' expr( expr ) ' ]'] + } + case Offset + { + "int([expr(expr )]) + } + case Deref + { + [ident '[ ' expr( expr ) ' ]' ] + } + case True + { + "true" + } + case False + { + "false" + } + case Nil + { + "0" + } + case Access + { + embedded_host(embedded_host) + expr_factor(_expr_factor) + } + case Cast + { + [type(type) '( ' expr_factor(_expr_factor) ' )' ] + } + default { + # Catches cases not specified + [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + [embedded_host(EH)] + } + case [ident `[ TL: expr `]] + { + [ident '[' expr( TL ) ']'] + } + case [E1: embedded_host `-> E2: lvalue] + { + embedded_host( E1 ) + lvalue( E2 ) + } + default { + # Catches cases not specified + [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + ['! ' expr_factor_op( _expr_factor_op )] + } + case [T: `~ expr_factor_op] + { + ['^ ' expr_factor_op( _expr_factor_op )] + } + case [expr_factor] + { + [expr_factor( ExprFactorOp.expr_factor )] + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + [expr_bitwise( _expr_bitwise ) ' & ' expr_factor_op( expr_factor_op )] + } + case [expr_factor_op] + { + [expr_factor_op( ExprBitwise.expr_factor_op )] + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + [expr_mult( _expr_mult ) ' * ' expr_bitwise( expr_bitwise )] + } + case [expr_bitwise] + { + [expr_bitwise( expr_bitwise )] + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + [expr_add( _expr_add ) ' ' Op ' ' expr_mult( expr_mult )] + } + case [expr_mult] + { + [expr_mult( ExprAdd.expr_mult )] + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + [expr_shift( _expr_shift ) ' ' Op ' ' expr_add( expr_add )] + } + case [expr_add] + { + [expr_add( expr_add )] + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + [expr_test( _expr_test ) ' ' Op ' ' expr_shift( expr_shift )] + } + case [expr_shift] + { + [expr_shift( ExprTest.expr_shift )] + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case S8 + ['int8'] + case S16 + ['int16'] + case S32 + ['int32'] + case S64 + ['int64'] + case S128 + ['long long'] + default + [Type] + } + + void number( Number: number ) + { + switch Number + case Unsigned + [uint] + default + [Number] + } + + void num_list( NumList: num_list ) + { + number( NumList.number ) + for CommaNum: comma_num in NumList { + [', ' number( CommaNum.number )] + } + } + + # Go must have {} around if and for statements. We strip any blocks from + # these statments and force our own. + void strip_block_stmt( Stmt: stmt ) + { + if match Stmt [`{ StmtList: stmt* `}] + stmt_list(StmtList) + else + stmt( Stmt ) + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + [embedded_host(EH)] + } + case [A: static_array] { + "var [A.ident] = \[\] " + "[type(A.type)] { [num_list(A.num_list)] } + } + case [V: static_value] { + "var [V.ident] [type(V.type)] = [V.number] + "var _ = [V.ident] + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ] { + # if-statements with only the if clause can go out as an if. + "if [expr(IfExpr)] { + strip_block_stmt(IfStmt) + "} + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + # If the if-statement has more than just an if clause it goes out as a switch. + "if [expr( IfExpr )] { + strip_block_stmt( IfStmt ) + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + "} else if [expr(ElseIfExpr)] { + strip_block_stmt(ElseIfStmt) + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + "} else { + strip_block_stmt(ElseStmt) + } + + "} + } + case ["while ( TRUE )" WhileStmt: stmt] { + "for { + strip_block_stmt(WhileStmt) + "} + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + "for [expr(WhileExpr)] { + strip_block_stmt(WhileStmt) + "} + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + "switch [expr(SwitchExpr)] { + stmt_list(StmtList) + "} + } + case [ExprExpr: expr `;] { + [expr(ExprExpr) ';'] + } + case [B: block] { + "{ + stmt_list(B.StmtList) + "} + } + case [D: declaration] + { + "var [D.ident] [type(D.type)]" + + if match D.opt_init ['=' Expr: expr] { + " = [expr(Expr)]" + } + ['\n'] + } + case [ES: export_stmt] + { + "#define [ES.ident] [number(ES.number)] + } + case [fallthrough] + { + "fallthrough + } + case [Index: index_stmt] + { + "var [Index.ident] int" + + if match Index.opt_init ['=' Expr: expr] { + " = [expr(Expr)]" + } + + ['\n'] + } + case [CB: case_block] + { + "case [expr( CB.expr )]: + stmt_list( CB._repeat_stmt ) + } + case [DB: default_block] + { + "default: + stmt_list( DB._repeat_stmt ) + } + case [CL: case_label] + { + "case [expr( CL.expr )]: + } + case [AS: assign_stmt] + { + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + "[Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + _ = new parser<out_go::out_go>() + Input: _input = _->gets() + Input->auto_trim(true) + + stmt_list( Start._repeat_stmt ) + + GO: out_go::out_go = _->finish() + + if GO { + send Output + [GO] + } + else { + send stderr + "failed to parse output: [_->error] + } + + } +end + +void trans( Output: stream, Start: start ) +{ + go_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-go/rlparse.lm b/ragel/host-go/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-go/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-java/.gitignore b/ragel/host-java/.gitignore new file mode 100644 index 00000000..9faa4062 --- /dev/null +++ b/ragel/host-java/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-java +/ragel-java.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-java/CMakeLists.txt b/ragel/host-java/CMakeLists.txt new file mode 100644 index 00000000..eb9fb876 --- /dev/null +++ b/ragel/host-java/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseJava + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcJava + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-java main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-java PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-java + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-java/Makefile.am b/ragel/host-java/Makefile.am new file mode 100644 index 00000000..a64d7533 --- /dev/null +++ b/ragel/host-java/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-java + +ragel_java_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_java_SOURCES = main.cc rlhc.c + +nodist_ragel_java_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_java_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseJava -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcJava -o $@ $< diff --git a/ragel/host-java/main.cc b/ragel/host-java/main.cc new file mode 100644 index 00000000..d4efd3fc --- /dev/null +++ b/ragel/host-java/main.cc @@ -0,0 +1,64 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseJava; +extern struct colm_sections rlhcJava; + +/* + * Java + */ + +const char *defaultOutFnJava( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".java" ); +} + +HostType hostTypesJava[] = +{ + { "byte", 0, "byte", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, 1 }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, 2 }, + { "char", 0, "char", false, true, false, 0, 0, 0, USHRT_MAX, 2 }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, 4 }, +}; + +const HostLang hostLangJava = +{ + hostTypesJava, + 4, + 2, + false, + true, /* loopLabels */ + Translated, + BreakFeature, + &makeCodeGen, + &defaultOutFnJava, + &genLineDirectiveTrans +}; + + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangJava, &rlparseJava, &rlhcJava ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-java/rlhc.lm b/ragel/host-java/rlhc.lm new file mode 100644 index 00000000..3704d7d5 --- /dev/null +++ b/ragel/host-java/rlhc.lm @@ -0,0 +1,501 @@ +include 'ril.lm' + +namespace java_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def java_out + [_IN_ _EX_ item*] +end + +namespace java_gen + + global Parser: parser<java_out::java_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] { + send Parser + "{ + " [stmt_list( StmtList )] + "} + } + case [host::`={ Expr: expr host::`}=] + send Parser "([expr( Expr )])" + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser [Tok] + } + } + } + + void embedded_host( EH: embedded_host ) + { + switch EH + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "{ + " [tok_list( TL )] + "} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case + [O:`( TL: expr C: `)] + { + send Parser + [O expr(TL) C] + } + case + [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case + ['offset' '(' ident ',' expr ')'] + { + send Parser + [expr( ExprFactor.expr )] + } + case + ['deref' '(' ident ',' expr ')'] + { + send Parser + [ ExprFactor.ident '[' expr( ExprFactor.expr ) ']'] + } + case + [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case + [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case + [N: `nil] + { + N.data = '0' + send Parser [N] + } + case + [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast Open: `( Type: type Close: `) expr_factor] + { + send Parser [Open] + type( Type ) + send Parser [Close] + expr_factor( ExprFactor._expr_factor ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case + [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [E1: embedded_host `-> E2: lvalue] + { + embedded_host( E1 ) + lvalue( E2 ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['byte '] + case "s16" + send Parser ['short '] + case "s32" + send Parser ['int '] + case "s64" + send Parser ['long '] + case "s128" + send Parser ['long long '] + case "uint" + send Parser ['int '] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]" + default + send Parser [Number] + } + + void java_num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )], " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + send Parser + "private static [type(A.type)] " + "[A.ident] \[\] = { [java_num_list(A.num_list)] }; + } + case [V: static_value] { + send Parser + "private static [V.type] [V.ident] = [V.number]; + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case [BL: break_label? 'while' '(' WhileExpr: expr ')' '{' StmtList: stmt* '}' ] { + if match BL [bl: break_label] + send Parser "[bl.ident]: " + + send Parser + "while ( [expr(WhileExpr)] ) { + " [stmt_list(StmtList)] + "} + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + send Parser + "switch ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + "} + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + "{ + " [stmt_list(TL)] + "} + } + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + [TypeList type(Type) Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "static final char [Export.ident] = [number(Export.number)]; + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "int [Index.ident]" + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + send Parser "; + + } + case [case_block] + { + send Parser + "case [expr( Stmt.case_block.expr )]: + "[stmt_list( Stmt.case_block._repeat_stmt )] + "break; + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + } + case [goto_label] { } + case [G: goto_stmt] { } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<java_out::java_out>() + + stmt_list( Start._repeat_stmt ) + + JO: java_out::java_out = Parser->finish() + + if JO { + send Output + [JO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + java_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-java/rlparse.lm b/ragel/host-java/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-java/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-js/.gitignore b/ragel/host-js/.gitignore new file mode 100644 index 00000000..2beac7d1 --- /dev/null +++ b/ragel/host-js/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-js +/ragel-js.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-js/CMakeLists.txt b/ragel/host-js/CMakeLists.txt new file mode 100644 index 00000000..f0795f36 --- /dev/null +++ b/ragel/host-js/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseJs + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcJs + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-js main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-js PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-js + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-js/Makefile.am b/ragel/host-js/Makefile.am new file mode 100644 index 00000000..5d80b533 --- /dev/null +++ b/ragel/host-js/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-js + +ragel_js_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_js_SOURCES = main.cc rlhc.c + +nodist_ragel_js_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_js_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseJs -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcJs -o $@ $< diff --git a/ragel/host-js/main.cc b/ragel/host-js/main.cc new file mode 100644 index 00000000..44796436 --- /dev/null +++ b/ragel/host-js/main.cc @@ -0,0 +1,66 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseJs; +extern struct colm_sections rlhcJs; + +/* + * JavaScript + */ + +const char *defaultOutFnJs( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".js" ); +} + +HostType hostTypesJS[] = +{ + { "s8", 0, "int8", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, 1 }, + { "u8", 0, "uint8", false, true, false, 0, 0, 0, UCHAR_MAX, 1 }, + { "s16", 0, "int16", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, 2 }, + { "u16", 0, "uint16", false, true, false, 0, 0, 0, USHRT_MAX, 2 }, + { "i32", 0, "int32", true, true, false, INT_MIN, INT_MAX, 0, 0, 4 }, + { "u32", 0, "uint32", false, true, false, 0, 0, 0, UINT_MAX, 4 }, + { "number", 0, "number", true, true, false, LONG_MIN, LONG_MAX, 0, 0, 8 }, +}; + +const HostLang hostLangJS = +{ + hostTypesJS, + 7, + 1, + false, + true, /* loopLabels */ + Translated, + BreakFeature, + &makeCodeGen, + &defaultOutFnJs, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangJS, &rlparseJs, &rlhcJs ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-js/rlhc.lm b/ragel/host-js/rlhc.lm new file mode 100644 index 00000000..61e0fbd6 --- /dev/null +++ b/ragel/host-js/rlhc.lm @@ -0,0 +1,501 @@ +include 'ril.lm' + +namespace js_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def js_out + [_IN_ _EX_ item*] +end + +namespace js_gen + + global Parser: parser<js_out::js_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] { + send Parser + "{ + " [stmt_list( StmtList )] + "} + } + case [host::`={ Expr: expr host::`}=] { + send Parser + "([expr( Expr )])" + } + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser + [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "{ + " [tok_list( TL )] + "} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [O:`( TL: expr C: `)] + { + send Parser + [O expr(TL) C] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case ['offset' '(' ident ',' expr ')'] + { + send Parser + [expr( ExprFactor.expr )] + } + case ['deref' '(' ident ',' expr ')'] + { + send Parser [ExprFactor.ident] + if $ExprFactor.ident == 'data' + send Parser ['.charCodeAt(' expr( ExprFactor.expr ) ')'] + else + send Parser ['[' expr( ExprFactor.expr ) ']'] + } + case [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case [N: `nil] + { + N.data = '-1' + send Parser [N] + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast Open: `( type Close: `) expr_factor] + { + expr_factor( ExprFactor._expr_factor ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [I: ident `[ E: expr `] `. F: ident] + { + send Parser + "[I]\[[ expr( E )]\].[F] + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test_op( Op: test_op ) + { + switch Op + case [ `== ] + send Parser '===' + case [ `!= ] + send Parser '!==' + default + send Parser [Op] + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + expr_test_op( ExprTest.test_op ) + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser [Number.uint] + default + send Parser [Number] + } + + void type( Type: type ) + { + switch Type + case 'u8' + send Parser 'Uint8' + case 'u16' + send Parser 'Uint16' + case 'u32' + send Parser 'Uint32' + case 's8' + send Parser 'Int8' + case 's16' + send Parser 'Int16' + case 's32' + send Parser 'Int32' + default + send Parser 'Float64' + } + + void num_list( NumList: num_list ) + { + number( NumList.number ) + for CommaNum: comma_num in NumList { + send Parser [', '] + number( CommaNum.number ) + } + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + send Parser + "var [A.ident] = new [type(A.type)]Array(\[ [num_list(A.num_list)]\]); + } + case [V: static_value] { + send Parser + "var [V.ident] = [V.number]; + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + send Parser + "switch ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + "} + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + "{ + " [stmt_list(TL)] + "} + } + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + "var [Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "var [Export.ident] = [number(Export.number)]; + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "var [Index.ident]" + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + + send Parser + "; + } + case [case_block] + { + send Parser + "case [expr( Stmt.case_block.expr )]: + "[stmt_list( Stmt.case_block._repeat_stmt )] + "break; + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + } + case [goto_label] { } + case [G: goto_stmt] { } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<js_out::js_out>() + + send Parser + "'use strict'; + + stmt_list( Start._repeat_stmt ) + + CO: js_out::js_out = Parser->finish() + + if CO { + send Output + [CO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + js_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-js/rlparse.lm b/ragel/host-js/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-js/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-julia/.gitignore b/ragel/host-julia/.gitignore new file mode 100644 index 00000000..86d9366b --- /dev/null +++ b/ragel/host-julia/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-julia +/ragel-julia.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-julia/CMakeLists.txt b/ragel/host-julia/CMakeLists.txt new file mode 100644 index 00000000..89b466e5 --- /dev/null +++ b/ragel/host-julia/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseJulia + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcJulia + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-julia main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-julia PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-julia + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-julia/Makefile.am b/ragel/host-julia/Makefile.am new file mode 100644 index 00000000..c6c9f3ae --- /dev/null +++ b/ragel/host-julia/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-julia + +ragel_julia_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_julia_SOURCES = main.cc rlhc.c + +nodist_ragel_julia_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_julia_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseJulia -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcJulia -o $@ $< diff --git a/ragel/host-julia/main.cc b/ragel/host-julia/main.cc new file mode 100644 index 00000000..f06d7493 --- /dev/null +++ b/ragel/host-julia/main.cc @@ -0,0 +1,60 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseJulia; +extern struct colm_sections rlhcJulia; + +/* + * Julia + */ +const char *defaultOutFnJulia( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".jl" ); +} + +HostType hostTypesJulia[] = +{ + { "u8", 0, "byte", true, true, false, 0, UCHAR_MAX, 0, 0, 4 }, +}; + +const HostLang hostLangJulia = +{ + hostTypesJulia, + 1, + 0, + false, + false, /* loopLabels */ + Translated, + GotoFeature, + &makeCodeGen, + &defaultOutFnJulia, + &genLineDirectiveTrans +}; + + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangJulia, &rlparseJulia, &rlhcJulia ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-julia/rlhc.lm b/ragel/host-julia/rlhc.lm new file mode 100644 index 00000000..085eb793 --- /dev/null +++ b/ragel/host-julia/rlhc.lm @@ -0,0 +1,558 @@ +include 'ril.lm' + +namespace julia_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + literal `function `end `while `if `else `elseif + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def kw + [`function _IN_] + | [`while _IN_] + | [`if _IN_] + | [_EX_ `elseif _IN_] + | [_EX_ `else _IN_] + | [_EX_ `end] + + def item + [comment] + | [kw] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def julia_out + [_IN_ _EX_ item*] +end + +namespace julia_gen + + global Parser: parser<julia_out::julia_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] { + send Parser + "begin + " [stmt_list( StmtList )] + "end + } + case [host::`={ Expr: expr host::`}=] { + send Parser + "([expr( Expr )])" + } + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser + [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + [tok_list( TL )] + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [`( E: expr `)] + { + send Parser + "([expr(E)])" + } + case [I: ident `[ E: expr `]] + { + send Parser + "[I]\[1+([expr( E )])\]" + } + case [`offset `( ident `, expr `)] + { + send Parser + [expr( ExprFactor.expr )] + } + case [`deref `( I: ident `, E: expr `)] + { + send Parser + "[I]\[1+([ expr( E ) ])\]" + } + case [`TRUE] + { + send Parser "true" + } + case [`FALSE] + { + send Parser "false" + } + case [N: `nil] + { + send Parser "0" + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast `( T: type `) F: expr_factor] + { + send Parser + "convert([type( T )], [expr_factor( F )] )" + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[1+(' expr(E) ')]'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [I: ident `[ E: expr `]] + { + send Parser + "[I]\[1+([expr( E )])\]" + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[1+(' expr(E) ')]'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['Int8'] + case "u8" + send Parser ['UInt8'] + case "s16" + send Parser ['Int16'] + case "s32" + send Parser ['Int32'] + case "s64" + send Parser ['Int64'] + case "s128" + send Parser ['Int128'] + case "uint" + send Parser ['UInt'] + case "int" + send Parser ['Int'] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]u" + default + send Parser [Number] + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )], " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + send Parser + "const [A.ident] = [type(A.type)]\[[num_list(A.num_list)]\] + } + case [V: static_value] { + send Parser + "const [V.ident] = [V.number] + } + # case [declaration] + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + [Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + else { + send Parser + "= 0 + } + + send Parser + [Semi] + } + case [ + `if `( IfExpr: expr `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if [expr(IfExpr)] + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + [`else `if `( ElseIfExpr: expr `) ElseIfStmt: stmt] + + send Parser + "elseif [expr(ElseIfExpr)] + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + + send Parser + "end + } + case [`while `( WhileExpr: expr `) WhileStmt: stmt] { + send Parser + "while [expr(WhileExpr)] + " [stmt(WhileStmt)] + "end + } + case [`switch `( SwitchExpr: expr `) `{ StmtList: stmt* `}] { + + require StmtList + [`case E1: expr `{ Inner: stmt* `} Rest: stmt*] + + send Parser + "if [expr(SwitchExpr)] == [expr(E1)] + " [stmt_list(Inner)] + + for S: stmt in repeat(Rest) { + switch S + case [`case E1: expr `{ Inner: stmt* `}] + { + send Parser + "elseif [expr(SwitchExpr)] == [expr(E1)] + " [stmt_list(Inner)] + } + case + [`default `{ Inner: stmt* `}] + { + send Parser + "else + " [stmt_list(Inner)] + } + } + + send Parser + "end + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr)] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + [stmt_list(TL)] + } + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + [TypeList type(Type) Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser "; + } + case [Export: export_stmt] + { + send Parser + "#define [Export.ident] [number(Export.number)] + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "[Index.ident]" + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + else { + send Parser " = 0 " + + } + + send Parser "; + } + case [case_block] + { + send Parser + "@case [expr( Stmt.case_block.expr )] begin + "[stmt_list( Stmt.case_block._repeat_stmt )] + "end + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + } + case [case_label] + { + send Parser + "@case [expr( Stmt.case_label.expr )] + } + case [L: goto_label] + { + send Parser + "@label [L.ident] + } + case [G: goto_stmt] + { + send Parser + "@goto [G.ident] + } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)] + } + default { + # catches unspecified cases + send Parser + "[Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<julia_out::julia_out>() + + stmt_list( Start._repeat_stmt ) + + CO: julia_out::julia_out = Parser->finish() + + if CO { + send Output + [CO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + julia_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-julia/rlparse.lm b/ragel/host-julia/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-julia/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-ocaml/.gitignore b/ragel/host-ocaml/.gitignore new file mode 100644 index 00000000..ef32eebe --- /dev/null +++ b/ragel/host-ocaml/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-ocaml +/ragel-ocaml.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-ocaml/CMakeLists.txt b/ragel/host-ocaml/CMakeLists.txt new file mode 100644 index 00000000..eff80ce9 --- /dev/null +++ b/ragel/host-ocaml/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseOCaml + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcOCaml + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-ocaml main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-ocaml PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-ocaml + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-ocaml/Makefile.am b/ragel/host-ocaml/Makefile.am new file mode 100644 index 00000000..be017453 --- /dev/null +++ b/ragel/host-ocaml/Makefile.am @@ -0,0 +1,32 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-ocaml + +ragel_ocaml_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_ocaml_SOURCES = main.cc rlhc.c + +nodist_ragel_ocaml_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_ocaml_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseOCaml -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc$(EXEEXT): rlhc.lm + $(COLM) -I.. -o $@ $< + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcOCaml -o $@ $< + + diff --git a/ragel/host-ocaml/main.cc b/ragel/host-ocaml/main.cc new file mode 100644 index 00000000..36f3d030 --- /dev/null +++ b/ragel/host-ocaml/main.cc @@ -0,0 +1,59 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseOCaml; +extern struct colm_sections rlhcOCaml; + +/* + * OCaml + */ +const char *defaultOutFnOCaml( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".ml" ); +} + +HostType hostTypesOCaml[] = +{ + { "int", 0, "int", true, true, false, S31BIT_MIN, S31BIT_MAX, 0, 0, 4 }, +}; + +const HostLang hostLangOCaml = +{ + hostTypesOCaml, + 1, + 0, + false, + false, /* loopLabels */ + Translated, + VarFeature, + &makeCodeGen, + &defaultOutFnOCaml, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangOCaml, &rlparseOCaml, &rlhcOCaml ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-ocaml/rlhc.lm b/ragel/host-ocaml/rlhc.lm new file mode 100644 index 00000000..362116b3 --- /dev/null +++ b/ragel/host-ocaml/rlhc.lm @@ -0,0 +1,606 @@ +include 'ril.lm' + +namespace ocaml_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '(*' any* :>> '*)' + / + + literal `begin `end `{ `} + + token id + /[a-zA-Z_][a-zA-Z_0-9]* "'"? / + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' | '`' / + + token string / + '"' ( [^"\\\n] | '\\' any ) * '"' | + "'" ( [^'\\\n] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`begin _IN_ item* _EX_ `end ] + | [`{ _IN_ item* _EX_ `} ] + + def ocaml_out + [_IN_ _EX_ item*] +end + +namespace ml_gen + + global StaticVarMap: map<str, str> = new map<str, str>() + global Parser: parser<ocaml_out::ocaml_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] + send Parser + "begin + "[stmt_list( StmtList )] + "end; + case [host::`={ Expr: expr host::`}=] + send Parser "([expr( Expr )])" + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser [Tok] + } + } + } + + void embedded_host( EH: embedded_host ) + { + switch EH + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "begin + "[tok_list( TL )] + "end; + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [`( TL: expr `)] + { + send Parser + "( [ expr(TL) ] )" + } + case [I: ident `[ TL: expr `]] + { + if ( StaticVarMap->find( $I ) || $I == 'stack' ) { + send Parser + "[ ExprFactor.ident ].([ expr( TL ) ])" + } + else { + send Parser + "[ ExprFactor.ident ].\[[ expr( TL ) ]\]" + } + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '.(' expr(E) ')'] + } + case ['offset' '(' ident ',' expr ')'] + { + send Parser + [expr( ExprFactor.expr )] + } + case ['deref' '(' I: ident ',' Expr: expr ')'] + { + if ( $I == 'data' ) + send Parser 'Char.code ' + + if ( StaticVarMap->find( $I ) ) { + send Parser + "[I].( [ expr( Expr ) ] )" + } + else { + send Parser + "[I].\[[ expr( Expr ) ]\]" + } + } + case [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case [N: `nil] + { + N.data = '0' + send Parser [N] + } + case [Number: number] + { + number( Number ) + } + case [I: ident] { + if ( StaticVarMap->find( $I ) ) { + send Parser + [^I] + } + else { + send Parser + "[^I].contents" + } + } + case [E1: embedded_host `-> E2: expr_factor] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast `( type `) expr_factor] + { + send Parser + [expr_factor( ExprFactor._expr_factor )] + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '.(' E ')'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser " lnot " + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [L: expr_bitwise `& R: expr_factor_op] + { + send Parser + "[expr_bitwise( L )] land [expr_factor_op( R )]" + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + switch Op + case [`<<] + send Parser " lsl " + default + send Parser " asr " + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + switch Op + case [`==] + send Parser "= " + default + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['char '] + case "s16" + send Parser ['short '] + case "s32" + send Parser ['int '] + case "s64" + send Parser ['long '] + case "s128" + send Parser ['long long '] + case "uint" + send Parser ['int '] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]u" + default + send Parser [Number] + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )]; " + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + StaticVarMap->insert( $A.ident, ' ' ) + send Parser + "let [A.ident] : int array = \[| + " [num_list(A.num_list)] + "|\] + } + case [V: static_value] { + StaticVarMap->insert( $V.ident, ' ' ) + send Parser + "let [V.ident] : [V.type] = [V.number] + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if [expr(IfExpr)] then + "begin + " [stmt(IfStmt)] + "end + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if [expr(ElseIfExpr)] then + "begin + " [stmt(ElseIfStmt)] + "end + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + "begin + " [stmt(ElseStmt)] + "end + } + + send Parser + ";" + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while [expr(WhileExpr)] do + " [stmt(WhileStmt)] + "done; + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + require StmtList + [`case E1: expr `{ Inner: stmt* `} Rest: stmt*] + + send Parser + "if [expr(SwitchExpr)] = [expr(E1)] then + "begin + " [stmt_list(Inner)] + "end + + for S: stmt in repeat(Rest) { + switch S + case [`case E1: expr `{ Inner: stmt* `}] + { + send Parser + "else if [expr(SwitchExpr)] = [expr(E1)] then + "begin + " [stmt_list(Inner)] + "end + } + case + [`default `{ Inner: stmt* `}] + { + send Parser + "else + "begin + " [stmt_list(Inner)] + "end + } + } + + send Parser + "; + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + "begin + "[stmt_list(TL)] + "end; + } + case [D: declaration] { + send Parser + "let [D.ident] : [type(D.type)] ref " + + switch D.opt_init + case [E: `= expr] { + send Parser + "= ref ( [expr(D.opt_init.expr)] )" + } + default { + send Parser + "= ref 0" + } + + send Parser + " in + } + case [Export: export_stmt] + { + send Parser + "#define [Export.ident] [number(Export.number)] + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "let [Index.ident] : int ref " + + switch Index.opt_init + case [E: `= expr] { + send Parser + "= ref ( [expr(Index.opt_init.expr)] )" + } + default { + send Parser + "= ref 0" + } + + send Parser + " in + } + case [case_block] + { + send Parser + "| [expr( Stmt.case_block.expr )] -> + "begin + "[stmt_list( Stmt.case_block._repeat_stmt )] + "end; + } + case [default_block] + { + send Parser + "| _ -> + "[stmt_list( Stmt.default_block._repeat_stmt )] + } + case [case_label] + { + send Parser + "case [expr( Stmt.case_label.expr )]: + } + case [AS: assign_stmt] + { + switch AS.assign_op + case [`=] + { + switch AS.LValue + case "stack\[[expr]\]" { + send Parser + "Array.set stack top.contents [expr(AS.expr)]; + } + case "nfa_bp\[[expr]\].state" { + send Parser + "Array.set nfa_bp_state nfa_len.contents [expr(AS.expr)]; + } + case "nfa_bp\[[expr]\].p" { + send Parser + "Array.set nfa_bp_p nfa_len.contents [expr(AS.expr)]; + } + default { + send Parser + "[lvalue(AS.LValue)] := [expr(AS.expr)]; + } + } + case [`+=] + { + parse RhsAsFactor: expr_factor [$AS.LValue] + send Parser + "[lvalue(AS.LValue)] := [expr_factor(RhsAsFactor)] + [expr(AS.expr)]; + } + case [`-=] + { + parse RhsAsFactor: expr_factor [$AS.LValue] + send Parser + "[lvalue(AS.LValue)] := [expr_factor(RhsAsFactor)] - [expr(AS.expr)]; + } + default { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<ocaml_out::ocaml_out>() + + stmt_list( Start._repeat_stmt ) + + MO: ocaml_out::ocaml_out = Parser->finish() + + if MO { + send Output + [MO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } + +end + +void trans( Output: stream, Start: start ) +{ + ml_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-ocaml/rlparse.lm b/ragel/host-ocaml/rlparse.lm new file mode 100644 index 00000000..fd0b2c29 --- /dev/null +++ b/ragel/host-ocaml/rlparse.lm @@ -0,0 +1,204 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-ruby/.gitignore b/ragel/host-ruby/.gitignore new file mode 100644 index 00000000..c6a4e041 --- /dev/null +++ b/ragel/host-ruby/.gitignore @@ -0,0 +1,12 @@ +/Makefile.in +/Makefile +/.deps +/.libs +/ragel-ruby +/ragel-ruby.exe +/rlparse.c +/rlreduce.cc +/rlhc.c + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-ruby/CMakeLists.txt b/ragel/host-ruby/CMakeLists.txt new file mode 100644 index 00000000..4ec4e387 --- /dev/null +++ b/ragel/host-ruby/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseRuby + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcRuby + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-ruby main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-ruby PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-ruby + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-ruby/Makefile.am b/ragel/host-ruby/Makefile.am new file mode 100644 index 00000000..a0d7312c --- /dev/null +++ b/ragel/host-ruby/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-ruby + +ragel_ruby_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_ruby_SOURCES = main.cc rlhc.c + +nodist_ragel_ruby_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_ruby_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseRuby -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcRuby -o $@ $< diff --git a/ragel/host-ruby/main.cc b/ragel/host-ruby/main.cc new file mode 100644 index 00000000..3fed52e3 --- /dev/null +++ b/ragel/host-ruby/main.cc @@ -0,0 +1,58 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseRuby; +extern struct colm_sections rlhcRuby; + +/* What are the appropriate types for ruby? */ +static HostType hostTypesRuby[] = +{ + { "char", 0, "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, 1 }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, 4 }, +}; + +const char *defaultOutFnRuby( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".rb" ); +} + +static const HostLang hostLangRuby = +{ + hostTypesRuby, + 2, + 0, + false, + false, /* loopLabels. */ + Translated, + BreakFeature, + &makeCodeGen, + &defaultOutFnRuby, + &genLineDirectiveTrans +}; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangRuby, &rlparseRuby, &rlhcRuby ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-ruby/rlhc.lm b/ragel/host-ruby/rlhc.lm new file mode 100644 index 00000000..f2a4800f --- /dev/null +++ b/ragel/host-ruby/rlhc.lm @@ -0,0 +1,524 @@ +include 'ril.lm' + +namespace ruby_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '#' any* :> '\n' + / + + literal `def `class `begin `end `while `if + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" | + "/" ( [^/\\] | '\\' any ) * "/" + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' | '{' | '}' | '\\' / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`begin _IN_] + | [`class _IN_] + | [`while _IN_] + | [`if _IN_] + | [`def _IN_] + | [_EX_ `end] + + def ruby_out + [_IN_ _EX_ item*] +end + +global Parser: parser<ruby_out::ruby_out> + +void tok_list( TL: host::tok* ) +{ + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] + { + send Parser + "begin + "[stmt_list( StmtList )] + "end + } + case [host::`={ Expr: expr host::`}=] + expr( Expr ) + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser [Tok] + } + } +} + +void embedded_host( EH: embedded_host ) +{ + switch EH + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "begin + " [tok_list( TL )] + "end + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } +} + +void expr_factor( ExprFactor: expr_factor ) +{ + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [O:`( TL: expr C: `)] + { + send Parser + [O expr(TL) C] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case ['offset' '(' ident ',' expr ')'] + { + send Parser + [expr( ExprFactor.expr )] + } + case ['deref' '(' ident ',' expr ')'] + { + send Parser + [ ExprFactor.ident '[' expr( ExprFactor.expr ) ']'] + if $ExprFactor.ident == 'data' + send Parser '.ord' + } + case [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case [N: `nil] + { + N.data = '0' + send Parser [N] + } + case [Number: number] + { + ruby_number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast Open: `( Type: type Close: `) expr_factor] + { + #send Parser [Open] + #type( Type ) + #send Parser [Close] + expr_factor( ExprFactor._expr_factor ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } +} + +void lvalue( ExprFactor: lvalue ) +{ + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [E1: embedded_host `-> E2: lvalue] + { + embedded_host( E1 ) + lvalue( E2 ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' E ']'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } +} + +void expr_factor_op( ExprFactorOp: expr_factor_op ) +{ + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } +} + +void expr_bitwise( ExprBitwise: expr_bitwise ) +{ + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } +} + +void expr_mult( ExprMult: expr_mult ) +{ + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } +} + +void expr_add( ExprAdd: expr_add ) +{ + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } +} + +void expr_shift( ExprShift: expr_shift ) +{ + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } +} + +void expr_test( ExprTest: expr_test ) +{ + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } +} + +void expr( Expr: expr ) +{ + expr_test( Expr.expr_test ) +} + +void type( Type: type ) +{ + switch Type + case "s8" + send Parser ['byte '] + case "s16" + send Parser ['short '] + case "s32" + send Parser ['int '] + case "s64" + send Parser ['long '] + case "s128" + send Parser ['long long '] + case "uint" + send Parser ['int '] + default + send Parser [Type] +} + +void ruby_number( Number: number ) +{ + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]" + default + send Parser [Number] +} + +void ruby_num_list( NumList: num_list ) +{ + for Number: number in NumList + send Parser "[ruby_number( Number )], " +} + +void stmt( Stmt: stmt ) +{ + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + send Parser + "class << self + " attr_accessor :[ A.ident ] + " private :[ A.ident ], :[ A.ident ]= + "end + "self.[ A.ident ] = \[ + " [ruby_num_list( A.num_list )] + "\] + " + } + case [V: static_value] { + send Parser + "class << self + " attr_accessor :[ V.ident ] + "end + "self.[ V.ident ] = [ V.number ]; + " + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "elsif ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + send Parser + "end + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + "end + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + send Parser + "case [expr(SwitchExpr)] + "when -2 then + "begin + " [stmt_list(StmtList)] + "end + "end + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + "begin + "[stmt_list(TL)] + "end + } + # [declaration] + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + [Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + else { + send Parser + "= 0 + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "class << self + " attr_accessor :[ Export.ident ] + "end + "self.[ Export.ident ] = [ ruby_number(Export.number) ]; + " + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "[Index.ident]" + + if match Index.opt_init [E: `= expr] { + send Parser + [E expr(Index.opt_init.expr)] + } + else { + send Parser + "= 0 + } + + send Parser "; + } + case [case_block] + { + send Parser + "end + "when [expr( Stmt.case_block.expr )] then + "begin + "[stmt_list( Stmt.case_block._repeat_stmt )] + } + case [default_block] + { + send Parser + "end + "else + "begin + "[stmt_list( Stmt.default_block._repeat_stmt )] + } + case [goto_label] {} + case [goto_stmt] {} + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + case [continue_stmt] + { + send Parser + "next; + } + default { + # catches unspecified cases + send Parser [Stmt] + } +} + +void stmt_list( StmtList: stmt* ) +{ + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) +} + +void ruby_trans( Output: stream, Start: start ) +{ + Parser = new parser<ruby_out::ruby_out>() + + stmt_list( Start._repeat_stmt ) + + RO: ruby_out::ruby_out = Parser->finish() + + if RO { + send Output + [RO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } +} + +void trans( Output: stream, Start: start ) +{ + ruby_trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-ruby/rlparse.lm b/ragel/host-ruby/rlparse.lm new file mode 100644 index 00000000..9570244c --- /dev/null +++ b/ragel/host-ruby/rlparse.lm @@ -0,0 +1,203 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / ruby_comment / + + token string + / s_literal | d_literal | host_re_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / ruby_comment / + + token string + / s_literal | d_literal | host_re_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any + +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/host-ruby/ruby.dsc b/ragel/host-ruby/ruby.dsc new file mode 100644 index 00000000..75342663 --- /dev/null +++ b/ragel/host-ruby/ruby.dsc @@ -0,0 +1,2 @@ +char NIL char true true false CHAR_MIN CHAR_MAX 0 0 1 +int NIL int true true false INT_MIN INT_MAX 0 0 4 diff --git a/ragel/host-rust/.gitignore b/ragel/host-rust/.gitignore new file mode 100644 index 00000000..e3f22bc7 --- /dev/null +++ b/ragel/host-rust/.gitignore @@ -0,0 +1,12 @@ +/Makefile +/Makefile.in +/.deps +/.libs +/ragel-rust +/ragel-rust.exe +/rlhc.c +/rlparse.c +/rlreduce.cc + +/CMakeFiles +/cmake_install.cmake diff --git a/ragel/host-rust/CMakeLists.txt b/ragel/host-rust/CMakeLists.txt new file mode 100644 index 00000000..272ef9f6 --- /dev/null +++ b/ragel/host-rust/CMakeLists.txt @@ -0,0 +1,34 @@ +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS rlparse.lm + COMMAND colm::colm + ARGS -I.. -c -b rlparseRust + -o "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + DEPENDS rlhc.lm + COMMAND colm::colm + ARGS -I.. -c -b rlhcRust + -o "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + rlhc.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel-rust main.cc + "${CMAKE_CURRENT_BINARY_DIR}/rlhc.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlparse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel-rust PRIVATE libragel libfsm) + +if(${PROJECT_NAME}_MAKE_INSTALL) + install(TARGETS ragel-rust + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() diff --git a/ragel/host-rust/Makefile.am b/ragel/host-rust/Makefile.am new file mode 100644 index 00000000..235c80d7 --- /dev/null +++ b/ragel/host-rust/Makefile.am @@ -0,0 +1,27 @@ +COLM = @COLM@ +COLM_LD = @COLM_LD@ +COLM_LIBDEP = @COLM_LIBDEP@ +COLM_BINDEP = @COLM_BINDEP@ + +bin_PROGRAMS = ragel-rust + +ragel_rust_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_ragel_rust_SOURCES = main.cc rlhc.c + +nodist_ragel_rust_SOURCES = \ + rlparse.c rlreduce.cc + +ragel_rust_LDADD = ../libragel.la ../libfsm.la $(COLM_LD) + +BUILT_SOURCES = rlparse.c rlreduce.cc rlhc.c + +EXTRA_DIST = rlparse.lm rlhc.lm + +rlparse.c: rlparse.lm $(COLM_BINDEP) + $(COLM) -I.. -c -b rlparseRust -o $@ -m rlreduce.cc $< + +rlreduce.cc: rlparse.c + +rlhc.c: rlhc.lm + $(COLM) -I.. -c -b rlhcRust -o $@ $< diff --git a/ragel/host-rust/main.cc b/ragel/host-rust/main.cc new file mode 100644 index 00000000..93c5147d --- /dev/null +++ b/ragel/host-rust/main.cc @@ -0,0 +1,60 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseRust; +extern struct colm_sections rlhcRust; + +/* + * Rust + */ +const char *defaultOutFnRust( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".rs" ); +} + +HostType hostTypesRust[] = +{ + { "u8", 0, "byte", true, true, false, 0, UCHAR_MAX, 0, 0, 4 }, +}; + +const HostLang hostLangRust = +{ + hostTypesRust, + 1, + 0, + false, + false, /* loopLabels */ + Translated, + BreakFeature, + &makeCodeGen, + &defaultOutFnRust, + &genLineDirectiveTrans +}; + + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangRust, &rlparseRust, &rlhcRust ); + return id.rlhcMain( argc, argv ); +} diff --git a/ragel/host-rust/rlhc.lm b/ragel/host-rust/rlhc.lm new file mode 100644 index 00000000..61daa84f --- /dev/null +++ b/ragel/host-rust/rlhc.lm @@ -0,0 +1,516 @@ +include 'ril.lm' + +namespace rust_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def rust_out + [_IN_ _EX_ item*] +end + +namespace rust_gen + + global Parser: parser<rust_out::rust_out> + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] + send Parser "{[stmt_list( StmtList )]}" + case [host::`={ Expr: expr host::`}=] + send Parser "([expr( Expr )])" + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default + send Parser [Tok] + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "{[tok_list( TL )]} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [O:`( TL: expr C: `)] + { + send Parser + [O expr(TL) C] + } + case [I: ident `[ E: expr `]] + { + send Parser + "[I]\[([expr( E )]) as usize\]" + } + case ['offset' '(' ident ',' expr ')'] + { + send Parser + "( [expr( ExprFactor.expr )] ) as i32" + } + case ['deref' '(' I: ident ',' E: expr ')'] + { + send Parser + "[I]\[([expr( E )]) as usize\] + } + case [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case [N: `nil] + { + N.data = '0' + send Parser [N] + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast `( T: type `) E: expr_factor] + { + send Parser + "( [expr_factor( E )] ) as [type(T)]" + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' expr(E) ' as usize]'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [I: ident O: `[ E: expr C: `]] + { + send Parser + "[I]\[([expr( E )]) as usize\] + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + case [I: ident `[ E: expr `] `. F: ident] { + send Parser + [^I '_' ^F '[' expr(E) ' as usize]'] + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [`~ EFO: expr_factor_op] + { + send Parser + "![expr_factor_op( EFO )] + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void type( Type: type ) + { + switch Type + case "s8" + send Parser ['i8 '] + case "s16" + send Parser ['i16 '] + case "s32" + send Parser ['i32 '] + case "s64" + send Parser ['i64 '] + case "s128" + send Parser ['i128'] + case "int" + send Parser ['i32'] + case "uint" + send Parser ['u32'] + default + send Parser [Type] + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser "[Number.uint]u" + default + send Parser [Number] + } + + void num_list( NumList: num_list ) + { + for Number: number in NumList + send Parser "[number( Number )], " + send Parser "0" + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + Length: int = 1 + for Number: number in A.num_list + Length = Length + 1 + + send Parser + "static [A.ident]: \[[type(A.type)]; [Length]\] = \[ [num_list(A.num_list)] \]; + } + case [V: static_value] { + send Parser + "static [V.ident]: i32 = [V.number]; + } + case [D: declaration] { + send Parser + "let mut [D.ident] " + + switch D.opt_init + case [E: `= expr] { + send Parser + "= [expr(D.opt_init.expr)]; + } + default { + send Parser + "= 0; + } + } + case [Index: index_stmt] + { + send Parser + "let mut [Index.ident] :i32" + + switch Index.opt_init + case [E: `= expr] { + send Parser + "= [expr(Index.opt_init.expr)]; + } + default { + send Parser + "= 0; + } + } + case [ + 'if' `( IfExpr: expr `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) { + " [stmt(IfStmt)] + "} + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) { + " [stmt(ElseIfStmt)] + "} + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else { + " [stmt(ElseStmt)] + "} + } + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case [`switch `( SwitchExpr: expr `) `{ StmtList: stmt* `}] { + send Parser + "match ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + + NeedDef: bool = true + for Stmt: stmt in repeat(StmtList) { + if match Stmt [default_block] + NeedDef = false + } + + if NeedDef { + send Parser + " _ => {} + } + + send Parser + "} + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + [L stmt_list(TL) R] + } + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + [TypeList type(Type) Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "#define [Export.ident] [number(Export.number)] + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [case_block] + { + send Parser + "[expr( Stmt.case_block.expr )] => { + "[stmt_list( Stmt.case_block._repeat_stmt )] + "} + } + case [default_block] + { + send Parser + "_ => { + "[stmt_list( Stmt.default_block._repeat_stmt )] + "} + } + case [case_label] + { + send Parser + "case [expr( Stmt.case_label.expr )]: + } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser<rust_out::rust_out>() + + stmt_list( Start._repeat_stmt ) + + CO: rust_out::rust_out = Parser->finish() + + if CO { + send Output + [CO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end + +void trans( Output: stream, Start: start ) +{ + rust_gen::trans( Output, Start ) +} + +include 'rlhc-main.lm' diff --git a/ragel/host-rust/rlparse.lm b/ragel/host-rust/rlparse.lm new file mode 100644 index 00000000..db7851c7 --- /dev/null +++ b/ragel/host-rust/rlparse.lm @@ -0,0 +1,202 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/idbase.cc b/ragel/idbase.cc new file mode 100644 index 00000000..c4daa344 --- /dev/null +++ b/ragel/idbase.cc @@ -0,0 +1,422 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "ragel.h" +#include "fsmgraph.h" +#include "parsedata.h" + +/* Error reporting format. */ +ErrorFormat errorFormat = ErrorFormatGNU; + +void FsmCtx::finalizeInstance( FsmAp *graph ) +{ + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer actions from the out action tables to eof action tables. */ + for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ ) + graph->transferOutActions( *state ); + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + if ( fsmGbl->wantDupsRemoved ) + graph->removeActionDups(); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + if ( graph->ctx->minimizeOpt != MinimizeNone ) { + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + switch ( graph->ctx->minimizeLevel ) { + #ifdef TO_UPGRADE_CONDS + case MinimizeApprox: + graph->minimizeApproximate(); + break; + #endif + #ifdef TO_UPGRADE_CONDS + case MinimizeStable: + graph->minimizeStable(); + break; + #endif + case MinimizePartition1: + graph->minimizePartition1(); + break; + case MinimizePartition2: + graph->minimizePartition2(); + break; + } + } + + graph->compressTransitions(); + + createNfaActions( graph ); +} + +void FsmCtx::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr || + item->type == InlineItem::Ncall || item->type == InlineItem::NcallExpr ) + { + action->anyCall = true; + } + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + LongestMatch *lm = item->longestMatch; + for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + LongestMatchPart *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + + +/* Check actions for bad uses of fsm directives. We don't go inside longest + * match items in actions created by ragel, since we just want the user + * actions. */ +void FsmCtx::checkInlineList( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* EOF checks. */ + if ( act->numEofRefs > 0 ) { + switch ( item->type ) { + /* Currently no checks. */ + default: + break; + } + } + + /* Recurse. */ + if ( item->children != 0 ) + checkInlineList( act, item->children ); + } +} + +void FsmCtx::checkAction( Action *action ) +{ + /* Check for actions with calls that are embedded within a longest match + * machine. */ + if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { + for ( NameInstVect::Iter ar = action->embedRoots; ar.lte(); ar++ ) { + NameInst *check = *ar; + while ( check != 0 ) { + if ( check->isLongestMatch ) { + fsmGbl->error(action->loc) << "within a scanner, fcall and fncall are permitted" + " only in pattern actions" << endl; + break; + } + check = check->parent; + } + } + } + + checkInlineList( action, action->inlineList ); +} + +void FsmCtx::analyzeGraph( FsmAp *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + //if ( trans->condSpace != 0 ) { + // for ( CondSet::Iter sci = trans->condSpace->condSet; sci.lte(); sci++ ) + // (*sci)->numCondRefs += 1; + //} + + if ( trans->plain() ) { + for ( ActionTable::Iter at = trans->tdap()->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + for ( ActionTable::Iter at = cond->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + } + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + //for ( OutCondSet::Iter oci = st->outCondSet; oci.lte(); oci++ ) + // oci->action->numCondRefs += 1; + + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + for ( ActionTable::Iter ati = n->pushTable; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->restoreTable; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->popAction; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->popTest; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + } + } + } + + /* Can't count on cond references in transitions, since we don't refcount + * the spaces. FIXME: That would be the proper solution. */ + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + (*csi)->numCondRefs += 1; + } + + /* Checks for bad usage of directives in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + checkAction( act ); +} + +/* This create an action that refs the original embed roots, if the optWrap arg + * is supplied. */ +Action *FsmCtx::newNfaWrapAction( const char *name, InlineList *inlineList, Action *optWrap ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "NONE"; + + Action *action = new Action( loc, name, inlineList, nextCondId++ ); + + if ( optWrap != 0 ) + action->embedRoots.append( optWrap->embedRoots ); + + actionList.append( action ); + return action; +} + +void FsmCtx::createNfaActions( FsmAp *fsm ) +{ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + /* Move pop restore actions into poptest. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->restoreTable; ati.lte(); ati++ ) { + n->popTest.setAction( ati->key, ati->value ); + } + + /* Move pop actions into pop test. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->popFrom; ati.lte(); ati++ ) { + + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + ati->value, InlineItem::NfaWrapAction ) ); + Action *wrap = newNfaWrapAction( "action_wrap", il1, ati->value ); + n->popTest.setAction( ORD_COND2, wrap ); + } + + /* Move condition evaluation into pop test. Wrap with condition + * execution. */ + if ( n->popCondSpace != 0 ) { + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + n->popCondSpace, n->popCondKeys, + InlineItem::NfaWrapConds ) ); + Action *wrap = newNfaWrapAction( "cond_wrap", il1, 0 ); + n->popTest.setAction( ORD_COND, wrap ); + } + + /* Move pop actions into pop test. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->popAction; ati.lte(); ati++ ) { + + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + ati->value, InlineItem::NfaWrapAction ) ); + Action *wrap = newNfaWrapAction( "action_wrap", il1, ati->value ); + n->popTest.setAction( ati->key, wrap ); + } + } + } + } +} + +void FsmCtx::prepareReduction( FsmAp *sectionGraph ) +{ + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() ) + sectionGraph->errState = sectionGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + sectionGraph->depthFirstOrdering(); + sectionGraph->sortStatesByFinal(); + sectionGraph->setStateNumbers( 0 ); +} + + +void translatedHostData( ostream &out, const std::string &data ) +{ + const char *p = data.c_str(); + for ( const char *c = p; *c != 0; ) { + if ( c[0] == '}' && ( c[1] == '@' || c[1] == '$' || c[1] == '=' ) ) { + out << "@}@" << c[1]; + c += 2; + } + else if ( c[0] == '@' ) { + out << "@@"; + c += 1; + } + // Have some escaping issues that these fix, but they lead to other problems. + // Can be reproduced by passing "={}" through ragel and adding --colm-backend + // else if ( c[0] == '=' ) { + // out << "@="; + // c += 1; + //} + // else if ( c[0] == '$' ) { + // out << "@$"; + // c += 1; + //} + else { + out << c[0]; + c += 1; + } + } +} + + +void FsmGbl::abortCompile( int code ) +{ + throw AbortCompile( code ); +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &FsmGbl::warning( const InputLoc &loc ) +{ + ostream &err = std::cerr; + err << loc << ": warning: "; + return err; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &FsmGbl::error() +{ + errorCount += 1; + ostream &err = std::cerr; + err << PROGNAME ": "; + return err; +} + +ostream &FsmGbl::error( const InputLoc &loc ) +{ + errorCount += 1; + ostream &err = std::cerr; + err << loc << ": "; + return err; +} + +ostream &FsmGbl::error_plain() +{ + errorCount += 1; + ostream &err = std::cerr; + return err; +} + + +std::ostream &FsmGbl::stats() +{ + return std::cout; +} + +/* Requested info. */ +std::ostream &FsmGbl::info() +{ + return std::cout; +} + +ostream &operator<<( ostream &out, const InputLoc &loc ) +{ + assert( loc.fileName != 0 ); + switch ( errorFormat ) { + case ErrorFormatMSVC: + out << loc.fileName << "(" << loc.line; + if ( loc.col ) + out << "," << loc.col; + out << ")"; + break; + + default: + out << loc.fileName << ":" << loc.line; + if ( loc.col ) + out << ":" << loc.col; + break; + } + return out; +} + diff --git a/ragel/inputdata.cc b/ragel/inputdata.cc new file mode 100644 index 00000000..cfe51a94 --- /dev/null +++ b/ragel/inputdata.cc @@ -0,0 +1,1152 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "common.h" +#include "inputdata.h" +#include "parsedata.h" +#include "load.h" +#include "rlscan.h" +#include "reducer.h" +#include "version.h" +#include "pcheck.h" +#include <colm/colm.h> + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <unistd.h> +#include <sstream> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#if defined(HAVE_SYS_WAIT_H) +#include <sys/wait.h> +#endif + +#ifdef _WIN32 +#include <windows.h> +#include <psapi.h> +#include <time.h> +#include <io.h> +#include <process.h> + +#if _MSC_VER +#define S_IRUSR _S_IREAD +#define S_IWUSR _S_IWRITE +#endif +#endif + +using std::istream; +using std::ifstream; +using std::ofstream; +using std::stringstream; +using std::ostream; +using std::endl; +using std::ios; + +InputData::~InputData() +{ + inputItems.empty(); + parseDataList.empty(); + sectionList.empty(); + + for ( Vector<const char**>::Iter fns = streamFileNames; fns.lte(); fns++ ) { + const char **ptr = *fns; + while ( *ptr != 0 ) { + ::free( (void*)*ptr ); + ptr += 1; + } + free( (void*) *fns ); + } + + if ( outputFileName != 0 ) + delete[] outputFileName; + + if ( histogramFn != 0 ) + ::free( (void*)histogramFn ); + + if ( histogram != 0 ) + delete[] histogram; + + for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) + free( (void*) *bl ); +} + +void InputData::makeDefaultFileName() +{ + if ( outputFileName == 0 ) + outputFileName = (hostLang->defaultOutFn)( inputFileName ); +} + +bool InputData::isBreadthLabel( const string &label ) +{ + for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) { + if ( label == *bl ) + return true; + } + return false; +} + +void InputData::createOutputStream() +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 ) { + if ( strcmp( inputFileName, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Create the filter on the output and open it. */ + outFilter = new output_filter( outputFileName ); + + /* Open the output stream, attaching it to the filter. */ + outStream = new ostream( outFilter ); + } + else { + /* Writing out to std out. */ + outStream = &std::cout; + } +} + +void InputData::openOutput() +{ + if ( outFilter != 0 ) { + outFilter->open( outputFileName, ios::out|ios::trunc ); + if ( !outFilter->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + abortCompile( 1 ); + } + } +} + +void InputData::prepareSingleMachine() +{ + ParseData *pd = 0; + GraphDictEl *gdEl = 0; + + /* Locate a machine spec to generate dot output for. We can only emit. + * Dot takes one graph at a time. */ + if ( machineSpec != 0 ) { + /* Machine specified. */ + ParseDataDictEl *pdEl = parseDataDict.find( machineSpec ); + if ( pdEl == 0 ) + error() << "could not locate machine specified with -S and/or -M" << endp; + pd = pdEl->value; + } + else { + /* No machine spec given, generate the first one. */ + if ( parseDataList.length() == 0 ) + error() << "no machine specification to generate graphviz output" << endp; + + pd = parseDataList.head; + } + + if ( machineName != 0 ) { + gdEl = pd->graphDict.find( machineName ); + if ( gdEl == 0 ) + error() << "machine definition/instantiation not found" << endp; + } + else { + /* We are using the whole machine spec. Need to make sure there + * are instances in the spec. */ + if ( pd->instanceList.length() == 0 ) + error() << "no machine instantiations to generate graphviz output" << endp; + } + + pd->prepareMachineGen( gdEl, hostLang ); + dotGenPd = pd; +} + +void InputData::prepareAllMachines() +{ + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) { + pd->prepareMachineGen( 0, hostLang ); + + pd->makeExports(); + } + + } +} + +void InputData::generateReduced() +{ + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) + pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang ); + } +} + +void InputData::verifyWriteHasData( InputItem *ii ) +{ + if ( ii->type == InputItem::Write ) { + if ( ii->pd->cgd == 0 ) + error( ii->loc ) << ii->pd->sectionName << ": no machine instantiations to write" << endl; + } +} + +void InputData::verifyWritesHaveData() +{ + for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ ) + verifyWriteHasData( ii ); +} + +void InputData::writeOutput( InputItem *ii ) +{ + switch ( ii->type ) { + case InputItem::Write: { + CodeGenData *cgd = ii->pd->cgd; + cgd->writeStatement( ii->loc, ii->writeArgs.size(), + ii->writeArgs, generateDot, hostLang ); + break; + } + case InputItem::HostData: { + switch ( hostLang->backend ) { + case Direct: + if ( ii->loc.fileName != 0 ) { + if ( ii->prev != 0 ) + *outStream << "\n"; + (*hostLang->genLineDirective)( *outStream, !noLineDirectives, ii->loc.line, ii->loc.fileName ); + } + + *outStream << ii->data.str(); + break; + case Translated: + openHostBlock( '@', this, *outStream, inputFileName, ii->loc.line ); + translatedHostData( *outStream, ii->data.str() ); + *outStream << "}@"; + break; + } + break; + } + case InputItem::EndSection: { + break; + } + } +} + +void InputData::writeOutput() +{ + for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ ) + writeOutput( ii ); +} + +void InputData::closeOutput() +{ + /* If writing to a file, delete the ostream, causing it to flush. + * Standard out is flushed automatically. */ + if ( outputFileName != 0 ) { + delete outStream; + delete outFilter; + } +} + +void InputData::processDot() +{ + /* Compiles the DOT machines. */ + prepareSingleMachine(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + createOutputStream(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* + * From this point on we should not be reporting any errors. + */ + + openOutput(); + writeDot( *outStream ); + closeOutput(); +} + +bool InputData::checkLastRef( InputItem *ii ) +{ + if ( generateDot ) + return true; + + if ( errorCount > 0 ) + return false; + + /* + * 1. Go forward to next last reference. + * 2. Fully process that machine, mark as processed. + * 3. Move forward through input items until no longer + */ + if ( ii->section != 0 && ii->section->lastReference == ii ) { + /* Fully Process. */ + ParseData *pd = ii->pd; + + if ( pd->instanceList.length() > 0 ) { +#ifdef WITH_RAGEL_KELBT + if ( ii->parser != 0 ) + ii->parser->terminateParser(); +#endif + + FsmRes res = pd->prepareMachineGen( 0, hostLang ); + + /* Compute exports from the export definitions. */ + pd->makeExports(); + + if ( !res.success() ) + return false; + + if ( errorCount > 0 ) + return false; + + pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang ); + + if ( errorCount > 0 ) + return false; + } + + /* Mark all input items referencing the machine as processed. */ + InputItem *toMark = lastFlush; + while ( true ) { + toMark->processed = true; + + if ( toMark == ii ) + break; + + toMark = toMark->next; + } + + /* Move forward, flushing input items until we get to an unprocessed + * input item. */ + while ( lastFlush != 0 && lastFlush->processed ) { + verifyWriteHasData( lastFlush ); + + if ( errorCount > 0 ) + return false; + + /* Flush out. */ + writeOutput( lastFlush ); + + lastFlush = lastFlush->next; + } + } + return true; +} + +void InputData::makeFirstInputItem() +{ + /* Make the first input item. */ + InputItem *firstInputItem = new InputItem; + firstInputItem->type = InputItem::HostData; + firstInputItem->loc.fileName = inputFileName; + firstInputItem->loc.line = 1; + firstInputItem->loc.col = 1; + inputItems.append( firstInputItem ); +} + +/* Send eof to all parsers. */ +void InputData::terminateAllParsers( ) +{ +#ifdef WITH_RAGEL_KELBT + for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ ) + pdel->value->terminateParser(); +#endif +} + +void InputData::flushRemaining() +{ + InputItem *item = inputItems.head; + + while ( item != 0 ) { + checkLastRef( item ); + item = item->next; + } + + /* Flush remaining items. */ + while ( lastFlush != 0 ) { + /* Flush out. */ + writeOutput( lastFlush ); + + lastFlush = lastFlush->next; + } +} + +void InputData::makeTranslateOutputFileName() +{ + origOutputFileName = outputFileName; + outputFileName = fileNameFromStem( outputFileName, ".ri" ); + genOutputFileName = outputFileName; +} + +#ifdef WITH_RAGEL_KELBT +void InputData::parseKelbt() +{ + /* + * Ragel Parser from ragel 6. + */ + ifstream *inFileStream; + istream *inStream; + + /* Open the input file for reading. */ + assert( inputFileName != 0 ); + inFileStream = new ifstream( inputFileName ); + if ( ! inFileStream->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endp; + inStream = inFileStream; + + makeFirstInputItem(); + + Scanner scanner( this, inputFileName, *inStream, 0, 0, 0, false ); + + scanner.sectionPass = true; + scanner.do_scan(); + + inStream->clear(); + inStream->seekg( 0, std::ios::beg ); + curItem = inputItems.head; + lastFlush = inputItems.head; + + scanner.sectionPass = false; + scanner.do_scan(); + + /* Finished, final check for errors.. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* Bail on above error. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + delete inFileStream; +} + +void InputData::processKelbt() +{ + /* With the kelbt version we implement two parse passes. The first is used + * to identify the last time that any given machine is referenced by a + * ragel section. In the second pass we parse, compile, and emit as far + * forward as possible when we encounter the last reference to a machine. + * */ + + if ( generateDot ) { + parseKelbt(); + terminateAllParsers(); + processDot(); + } + else { + createOutputStream(); + openOutput(); + parseKelbt(); + flushRemaining(); + closeOutput(); + } + + assert( errorCount == 0 ); +} +#endif + +bool InputData::parseReduce() +{ + /* + * Colm-based reduction parser introduced in ragel 7. + */ + + TopLevel *topLevel = new TopLevel( frontendSections, this, hostLang, + minimizeLevel, minimizeOpt ); + + /* Check input file. File is actually opened by colm code. We don't + * need to perform the check if in libragel since it comes in via a + * string. */ + if ( input == 0 ) { + ifstream *inFile = new ifstream( inputFileName ); + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endp; + delete inFile; + } + + if ( errorCount ) + return false; + + makeFirstInputItem(); + + curItem = inputItems.head; + lastFlush = inputItems.head; + + + topLevel->reduceFile( "rlparse", inputFileName ); + + if ( errorCount ) + return false; + + bool success = topLevel->success; + + delete topLevel; + return success; +} + +bool InputData::processReduce() +{ + if ( generateDot ) { + parseReduce(); + processDot(); + return true; + } + else { + createOutputStream(); + openOutput(); + + bool success = parseReduce(); + if ( success ) + flushRemaining(); + + closeOutput(); + + if ( !success && outputFileName != 0 ) + unlink( outputFileName ); + + return success; + } +} + +bool InputData::process() +{ + switch ( frontend ) { + case KelbtBased: { +#ifdef WITH_RAGEL_KELBT + processKelbt(); +#endif + return true; + } + case ReduceBased: { + return processReduce(); + } + } + return false; +} + +/* Print a summary of the options. */ +void InputData::usage() +{ + info() << +"usage: ragel [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +" -s Print some statistics and compilation info to stderr\n" +" -d Do not remove duplicates from action lists\n" +" -I <dir> Add <dir> to the list of directories to search\n" +" for included an imported files\n" +" --rlhc Show the rlhc command used to compile\n" +" --save-temps Do not delete intermediate file during compilation\n" +" --no-intermediate Disable call to rlhc, leave behind intermediate\n" +"error reporting format:\n" +" --error-format=gnu file:line:column: message (default)\n" +" --error-format=msvc file(line,column): message\n" +"fsm minimization:\n" +" -n Do not perform minimization\n" +" -m Minimize at the end of the compilation\n" +" -l Minimize after most operations (default)\n" +" -e Minimize after every operation\n" +"visualization:\n" +" -V Generate a dot file for Graphviz\n" +" -p Display printable characters on labels\n" +" -S <spec> FSM specification to output (for graphviz output)\n" +" -M <machine> Machine definition/instantiation to output (for\n" +" graphviz output)\n" +"host language:\n" +" -C C, C++, Obj-C or Obj-C++ (default)\n" +" All code styles supported.\n" +" --asm --gas-x86-64-sys-v\n" +" GNU AS, x86_64, System V ABI.\n" +" Generated in a code style equivalent to -G2\n" +" -D D All code styles supported\n" +" -Z Go All code styles supported\n" +" -A C# -T0 -T1 -F0 -F1 -G0 -G1\n" +" -J Java -T0 -T1 -F0 -F1\n" +" -R Ruby -T0 -T1 -F0 -F1\n" +" -O OCaml -T0 -T1 -F0 -F1\n" +" -U Rust -T0 -T1 -F0 -F1\n" +" -Y Julia -T0 -T1 -F0 -F1\n" +" -K Crack -T0 -T1 -F0 -F1\n" +" -P JavaScript -T0 -T1 -F0 -F1\n" +"line directives:\n" +" -L Inhibit writing of #line directives\n" +"code style:\n" +" -T0 Binary search (default)\n" +" -T1 Binary search with expanded actions \n" +" -F0 Flat table\n" +" -F1 Flat table with expanded actions\n" +" -G0 Switch-driven\n" +" -G1 Switch-driven with expanded actions\n" +" -G2 Goto-driven with expanded actions\n" +"large machines:\n" +" --integral-tables Use integers for table data (default)\n" +" --string-tables Encode table data into strings for faster host lang\n" +" compilation\n" +"analysis:\n" +" --prior-interaction Search for condition-based general repetitions\n" +" that will not function properly due to state mod\n" +" overlap and must be NFA reps. \n" +" --conds-depth=D Search for high-cost conditions inside a prefix\n" +" of the machine (depth D from start state).\n" +" --state-limit=L Report fail if number of states exceeds this\n" +" during compilation.\n" +" --breadth-check=E1,E2,.. Report breadth cost of named entry points and\n" +" the start state.\n" +" --input-histogram=FN Input char histogram for breadth check. If\n" +" unspecified a flat histogram is used.\n" +"testing:\n" +" --kelbt-frontend Compile using original ragel + kelbt frontend\n" +" Requires ragel be built with ragel + kelbt support\n" +" --colm-frontend Compile using a colm-based recursive descent\n" +" frontend\n" +" --reduce-frontend Compile using a colm-based reducer (default)\n" +" --var-backend Use the variable-based backend even if the host lang\n" +" supports goto-based\n" +" --supported-host-langs Show supported host languages by command line arg\n" +" --supported-frontends Show supported frontends\n" +" --supported-backends Show supported backends\n" +" --force-libragel Cause mainline to behave like libragel\n" + ; + + abortCompile( 0 ); +} + +/* Print version information and exit. */ +void InputData::version() +{ + info() << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl << + "Copyright (c) 2001-2019 by Adrian Thurston" << endl; + abortCompile( 0 ); +} + +void InputData::showFrontends() +{ + ostream &out = info(); + out << "--colm-frontend"; + out << " --reduce-frontend"; +#ifdef WITH_RAGEL_KELBT + out << " --kelbt-frontend"; +#endif + out << endl; + abortCompile( 0 ); +} + +void InputData::showBackends() +{ + info() << + "--direct-backend --colm-backend" << endl; + abortCompile( 0 ); +} + +InputLoc makeInputLoc( const char *fileName, int line, int col ) +{ + InputLoc loc( fileName, line, col ); + return loc; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +void InputData::parseArgs( int argc, const char **argv ) +{ + ParamCheck pc( "o:dnmleabjkS:M:I:vHh?-:sT:F:W:G:LpV", argc, argv ); + + /* Decide if we were invoked using a path variable, or with an explicit path. */ + const char *lastSlash = strrchr( argv[0], '/' ); + if ( lastSlash == 0 ) { + /* Defualt to the the binary install location. */ + dirName = BINDIR; + } + else { + /* Compute dirName from argv0. */ + dirName = string( argv[0], lastSlash - argv[0] ); + } + + /* FIXME: Need to check code styles VS langauge. */ + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + case 'V': + generateDot = true; + break; + + /* Output. */ + case 'o': + if ( *pc.paramArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = new char[strlen(pc.paramArg)+1]; + strcpy( (char*)outputFileName, pc.paramArg ); + } + break; + + /* Flag for turning off duplicate action removal. */ + case 'd': + wantDupsRemoved = false; + break; + + /* Minimization, mostly hidden options. */ + case 'n': + minimizeOpt = MinimizeNone; + break; + case 'm': + minimizeOpt = MinimizeEnd; + break; + case 'l': + minimizeOpt = MinimizeMostOps; + break; + case 'e': + minimizeOpt = MinimizeEveryOp; + break; + case 'a': + #ifdef TO_UPGRADE_CONDS + minimizeLevel = MinimizeApprox; + #else + error() << "minimize approx (-a) unsupported in this version" << endp; + #endif + break; + case 'b': + #ifdef TO_UPGRADE_CONDS + minimizeLevel = MinimizeStable; + #else + error() << "minimize stable (-b) unsupported in this version" << endp; + #endif + break; + case 'j': + minimizeLevel = MinimizePartition1; + break; + case 'k': + minimizeLevel = MinimizePartition2; + break; + + /* Machine spec. */ + case 'S': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -S" << endl; + else if ( machineSpec != 0 ) + error() << "more than one -S argument was given" << endl; + else { + /* Ok, remember the path to the machine to generate. */ + machineSpec = pc.paramArg; + } + break; + + /* Machine path. */ + case 'M': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -M" << endl; + else if ( machineName != 0 ) + error() << "more than one -M argument was given" << endl; + else { + /* Ok, remember the machine name to generate. */ + machineName = pc.paramArg; + } + break; + + case 'I': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -I" << endl; + else { + includePaths.append( pc.paramArg ); + } + break; + + /* Version and help. */ + case 'v': + version(); + break; + case 'H': case 'h': case '?': + usage(); + break; + case 's': + printStatistics = true; + break; + case '-': { + char *arg = strdup( pc.paramArg ); + char *eq = strchr( arg, '=' ); + + if ( eq != 0 ) + *eq++ = 0; + + if ( strcmp( arg, "help" ) == 0 ) + usage(); + else if ( strcmp( arg, "version" ) == 0 ) + version(); + else if ( strcmp( arg, "error-format" ) == 0 ) { + if ( eq == 0 ) + error() << "expecting '=value' for error-format" << endl; + else if ( strcmp( eq, "gnu" ) == 0 ) + errorFormat = ErrorFormatGNU; + else if ( strcmp( eq, "msvc" ) == 0 ) + errorFormat = ErrorFormatMSVC; + else + error() << "invalid value for error-format" << endl; + } + else if ( strcmp( arg, "rlhc" ) == 0 ) + rlhc = true; + else if ( strcmp( arg, "no-intermediate" ) == 0 ) + noIntermediate = true; +#ifdef WITH_RAGEL_KELBT + else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) { + frontend = KelbtBased; + frontendSpecified = true; + } +#else + else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) { + error() << "--kelbt-frontend specified but, " + "ragel not built with ragel+kelbt support" << endp; + } +#endif + else if ( strcmp( arg, "reduce-frontend" ) == 0 ) { + frontend = ReduceBased; + frontendSpecified = true; + } + else if ( strcmp( arg, "string-tables" ) == 0 ) + stringTables = true; + else if ( strcmp( arg, "integral-tables" ) == 0 ) + stringTables = false; + else if ( strcmp( arg, "supported-frontends" ) == 0 ) + showFrontends(); + else if ( strcmp( arg, "supported-backends" ) == 0 ) + showBackends(); + else if ( strcmp( arg, "save-temps" ) == 0 ) + saveTemps = true; + + else if ( strcmp( arg, "prior-interaction" ) == 0 ) + checkPriorInteraction = true; + else if ( strcmp( arg, "conds-depth" ) == 0 ) + condsCheckDepth = strtol( eq, 0, 10 ); + else if ( strcmp( arg, "state-limit" ) == 0 ) + stateLimit = strtol( eq, 0, 10 ); + + else if ( strcmp( arg, "breadth-check" ) == 0 ) { + char *ptr = 0; + while ( true ) { + char *label = strtok_r( eq, ",", &ptr ); + eq = NULL; + if ( label == NULL ) + break; + breadthLabels.append( strdup( label ) ); + } + checkBreadth = true; + } + else if ( strcmp( arg, "input-histogram" ) == 0 ) + histogramFn = strdup(eq); + else if ( strcmp( arg, "var-backend" ) == 0 ) + forceVar = true; + else if ( strcmp( arg, "no-fork" ) == 0 ) + noFork = true; + else { + error() << "--" << pc.paramArg << + " is an invalid argument" << endl; + } + free( arg ); + break; + } + + /* Passthrough args. */ + case 'T': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenBinaryLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenBinaryExp; + else { + error() << "-T" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'F': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenFlatLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenFlatExp; + else { + error() << "-F" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'G': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenGotoLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenGotoExp; + else if ( pc.paramArg[0] == '2' ) + codeStyle = GenIpGoto; + else if ( pc.paramArg[0] == 'T' && pc.paramArg[1] == '2' ) { + codeStyle = GenIpGoto; + maxTransitions = 32; + } else { + error() << "-G" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'W': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenSwitchLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenSwitchExp; + else { + error() << "-G" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + + case 'p': + displayPrintables = true; + break; + + case 'L': + noLineDirectives = true; + break; + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } +} + +void InputData::loadHistogram() +{ + const int alphsize = 256; + + /* Init a default. */ + histogram = new double[alphsize]; + ifstream h( histogramFn ); + if ( !h.is_open() ) + error() << "histogram read: failed to open file: " << histogramFn << endp; + + int i = 0; + double value; + while ( true ) { + if ( h >> value ) { + if ( i >= alphsize ) { + /* Too many items. */ + error() << "histogram read: too many histogram values," + " expecting " << alphsize << " (for char alphabet)" << endp; + } + histogram[i] = value; + i++; + } + else { + /* Read failure. */ + if ( h.eof() ) { + if ( i < alphsize ) { + error() << "histogram read: fell short of " << + alphsize << " items" << endp; + } + break; + } + else { + error() << "histogram read: error at item " << i << endp; + } + } + } +} + +void InputData::defaultHistogram() +{ + /* Flat histogram. */ + const int alphsize = 256; + histogram = new double[alphsize]; + for ( int i = 0; i < alphsize; i++ ) { + histogram[i] = 1.0 / (double)alphsize; + } +} + +void InputData::checkArgs() +{ + /* Require an input file. If we use standard in then we won't have a file + * name on which to base the output. */ + if ( inputFileName == 0 ) + error() << "no input file given" << endl; + + /* Bail on argument processing errors. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endp; + } + + if ( !frontendSpecified ) + frontend = ReduceBased; + + if ( checkBreadth ) { + if ( histogramFn != 0 ) + loadHistogram(); + else + defaultHistogram(); + } +} + +char *InputData::readInput( const char *inputFileName ) +{ + struct stat st; + int res = stat( inputFileName, &st ); + if ( res != 0 ) { + error() << inputFileName << ": stat failed: " << strerror(errno) << endl; + return 0; + } + + std::ifstream in( inputFileName ); + if ( !in.is_open() ) { + error() << inputFileName << ": could not open in force-libragel mode"; + return 0; + } + + char *input = new char[st.st_size+1]; + in.read( input, st.st_size ); + if ( in.gcount() != st.st_size ) { + error() << inputFileName << ": could not read in force-libragel mode"; + delete[] input; + return 0; + } + input[st.st_size] = 0; + + return input; +} + +int InputData::main( int argc, const char **argv ) +{ + int code = 0; + try { + parseArgs( argc, argv ); + checkArgs(); + if ( !generateDot ) + makeDefaultFileName(); + + if ( !process() ) + abortCompile( 1 ); + } + catch ( const AbortCompile &ac ) { + code = ac.code; + } + + return code; +} + +int InputData::runFrontend( int argc, const char **argv ) +{ + if ( !process() ) + return -1; + return 0; +} + +int InputData::runRlhc( int argc, const char **argv ) +{ + struct colm_program *prg; + int exit_status; + + prg = colm_new_program( rlhcSections ); + colm_set_debug( prg, 0 ); + colm_run_program( prg, argc, argv ); + exit_status = colm_delete_program( prg ); + return exit_status; +} + +/* Run a job (frontend or backend). If we want forks then we return the result + * via the process's exit code. otherwise it comes back on the stack. */ +int InputData::runJob( const char *what, IdProcess idProcess, int argc, const char **argv ) +{ +#if defined(HAVE_SYS_WAIT_H) + if ( !noFork ) { + pid_t pid = fork(); + + if ( pid == 0 ) { + int es = (this->*idProcess)( argc, argv ); + exit( es ); + } + + int status = 0; + waitpid( pid, &status, 0 ); + if ( WIFSIGNALED(status) ) { + error() << what << " stopped by signal: " << WTERMSIG(status) << std::endl; + return -1; + } + + return WEXITSTATUS( status ); + } +#endif + return (this->*idProcess)( argc, argv ); +} + +int InputData::rlhcMain( int argc, const char **argv ) +{ + parseArgs( argc, argv ); + checkArgs(); + makeDefaultFileName(); + makeTranslateOutputFileName(); + + int es = runJob( "frontend", &InputData::runFrontend, 0, 0 ); + + if ( es != 0 ) + return es; + + /* rlhc <input> <output> */ + const char *_argv[] = { "rlhc", + genOutputFileName.c_str(), + origOutputFileName.c_str(), 0 }; + + return runJob( "rlhc", &InputData::runRlhc, 3, _argv ); +} diff --git a/ragel/inputdata.h b/ragel/inputdata.h new file mode 100644 index 00000000..36028778 --- /dev/null +++ b/ragel/inputdata.h @@ -0,0 +1,361 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _INPUT_DATA +#define _INPUT_DATA + +#include "gendata.h" +#include <iostream> +#include <sstream> +#include <vector> + +struct ParseData; +struct Parser6; +struct CondSpace; +struct CondAp; +struct ActionTable; +struct Section; +struct LangFuncs; + +void translatedHostData( ostream &out, const string &data ); + +struct InputItem +{ + InputItem() + : + section(0), + pd(0), + parser(0), + processed(false) + {} + + enum Type { + HostData, + EndSection, + Write, + }; + + Type type; + std::ostringstream data; + std::string name; + Section *section; + ParseData *pd; + Parser6 *parser; + std::vector<std::string> writeArgs; + + InputLoc loc; + bool processed; + + InputItem *prev, *next; +}; + +struct IncItem +{ + IncItem() + : + section(0) + {} + + Section *section; + InputLoc loc; + long start, end; + size_t length; + IncItem *prev, *next; +}; + + +typedef AvlMap<std::string, ParseData*, CmpString> ParseDataDict; +typedef AvlMapEl<std::string, ParseData*> ParseDataDictEl; +typedef DList<ParseData> ParseDataList; + +/* This exists for ragel-6 parsing. */ +typedef AvlMap<const char*, Parser6*, CmpStr> ParserDict; +typedef AvlMapEl<const char*, Parser6*> ParserDictEl; +typedef DList<Parser6> ParserList; + +typedef DList<InputItem> InputItemList; +typedef DList<IncItem> IncItemList; +typedef Vector<const char *> ArgsVector; + +struct Section +{ + Section( std::string sectionName ) + : + sectionName(sectionName), + lastReference(0) + {} + + std::string sectionName; + + /* Pointer to the last input item to reference this parse data struct. Once + * we pass over this item we are free to clear away the parse tree. */ + InputItem *lastReference; + + Section *prev, *next; +}; + +typedef AvlMap<std::string, Section*, CmpString> SectionDict; +typedef AvlMapEl<std::string, Section*> SectionDictEl; +typedef DList<Section> SectionList; + +struct FnMachine +{ + FnMachine( const string &fileName, const string &machine ) + : fileName( fileName ), machine( machine ) {} + + string fileName; + string machine; +}; + +struct CmpFnMachine +{ + static inline int compare( const FnMachine &k1, const FnMachine &k2 ) + { + int r = strcmp( k1.fileName.c_str(), k2.fileName.c_str() ); + if ( r != 0 ) + return r; + else { + r = strcmp( k1.machine.c_str(), k2.machine.c_str() ); + if ( r != 0 ) + return r; + } + return 0; + } +}; + +struct IncludeRec + : public AvlTreeEl<IncludeRec> +{ + IncludeRec( const string &fileName, const string &machine ) + : key( fileName, machine ), data(0) {} + + ~IncludeRec() + { + if ( data != 0 ) + delete[] data; + } + + FnMachine key; + + const FnMachine &getKey() + { return key; } + + std::string foundFileName; + + char *data; + int len; + +}; + +struct InputData +: + public FsmGbl +{ + InputData( const HostLang *hostLang, + struct colm_sections *frontendSections, struct colm_sections *rlhcSections ) + : + FsmGbl(hostLang), + frontendSections(frontendSections), + rlhcSections(rlhcSections), + inputFileName(0), + outputFileName(0), + nextMachineId(0), + inStream(0), + outStream(0), + outFilter(0), + curItem(0), + lastFlush(0), + codeStyle(GenBinaryLoop), + dotGenPd(0), + machineSpec(0), + machineName(0), + generateDot(false), + noLineDirectives(false), + maxTransitions(LONG_MAX), + numSplitPartitions(0), + rlhc(false), + rlhcShowCmd(false), + noIntermediate(false), + frontendSpecified(false), + backendSpecified(false), + featureSpecified(false), + saveTemps(false), + condsCheckDepth(-1), + transSpanDepth(6), + stateLimit(0), + checkBreadth(0), + varBackend(false), + histogramFn(0), + histogram(0), + input(0), + forceVar(false), + noFork(false) + {} + + ~InputData(); + + void usage(); + void version(); + void showFrontends(); + void showBackends(); + + struct colm_sections *frontendSections; + struct colm_sections *rlhcSections; + std::string dirName; + + /* The name of the root section, this does not change during an include. */ + const char *inputFileName; + const char *outputFileName; + + string comm; + + int nextMachineId; + + std::string origOutputFileName; + std::string genOutputFileName; + + /* Io globals. */ + std::istream *inStream; + std::ostream *outStream; + output_filter *outFilter; + + ParseDataDict parseDataDict; + ParseDataList parseDataList; + InputItemList inputItems; + InputItem *curItem; + InputItem *lastFlush; + + /* Ragel-6 frontend. */ + ParserDict parserDict; + ParserList parserList; + + SectionDict sectionDict; + SectionList sectionList; + + ArgsVector includePaths; + + bool isBreadthLabel( const string &label ); + ArgsVector breadthLabels; + + /* Target language and output style. */ + CodeStyle codeStyle; + + ParseData *dotGenPd; + + const char *machineSpec; + const char *machineName; + + bool generateDot; + + bool noLineDirectives; + + long maxTransitions; + int numSplitPartitions; + + bool rlhc; + bool rlhcShowCmd; + bool noIntermediate; + + bool frontendSpecified; + RagelFrontend frontend; + + bool backendSpecified; + + bool featureSpecified; + + bool saveTemps; + long condsCheckDepth; + long transSpanDepth; + long stateLimit; + bool checkBreadth; + + bool varBackend; + + const char *histogramFn; + double *histogram; + + const char *input; + + Vector<const char**> streamFileNames; + + bool forceVar; + bool noFork; + + void verifyWriteHasData( InputItem *ii ); + void verifyWritesHaveData(); + + void makeTranslateOutputFileName(); + void flushRemaining(); + void makeFirstInputItem(); + void writeOutput(); + void makeDefaultFileName(); + void createOutputStream(); + void openOutput(); + void closeOutput(); + void generateReduced(); + void prepareSingleMachine(); + void prepareAllMachines(); + + void writeOutput( InputItem *ii ); + void writeLanguage( std::ostream &out ); + + bool checkLastRef( InputItem *ii ); + + void parseKelbt(); + void processDot(); + void processCodeEarly(); + + void writeDot( std::ostream &out ); + + void loadHistogram(); + void defaultHistogram(); + + void parseArgs( int argc, const char **argv ); + void checkArgs(); + void terminateParser( Parser6 *parser ); + void terminateAllParsers(); + + void processKelbt(); + void processColm(); + bool processReduce(); + bool process(); + bool parseReduce(); + + char *readInput( const char *inputFileName ); + + const char **makeIncludePathChecks( const char *curFileName, const char *fileName ); + std::ifstream *tryOpenInclude( const char **pathChecks, long &found ); + int main( int argc, const char **argv ); + + int runFrontend( int argc, const char **argv ); + int runRlhc( int argc, const char **argv ); + + typedef int (InputData::*IdProcess)( int argc, const char **argv ); + + int runJob( const char *what, IdProcess idProcess, + int argc, const char **argv ); + + int rlhcMain( int argc, const char **argv ); +}; + + +#endif diff --git a/ragel/ipgoto.cc b/ragel/ipgoto.cc new file mode 100644 index 00000000..c273b012 --- /dev/null +++ b/ragel/ipgoto.cc @@ -0,0 +1,765 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "ipgoto.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" +#include "parsedata.h" +#include "inputdata.h" + +#include <sstream> + +using std::ostringstream; + +void IpGoto::tableDataPass() +{ + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void IpGoto::genAnalysis() +{ + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose single. */ + redFsm->moveSelectTransToSingle(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + +bool IpGoto::useAgainLabel() +{ + return redFsm->anyActionRets() || + redFsm->anyActionByValControl() || + redFsm->anyRegNextStmt(); +} + +void IpGoto::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + ret << "goto " << stLabel[gotoDest].reference() << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << + "; " << TOP() << "+= 1; "; + + ret << "goto " << stLabel[callDest].reference() << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << + "; " << TOP() << "+= 1; " << vCS() << " = " << callDest << "; " << + CLOSE_GEN_BLOCK(); +} + +void IpGoto::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << "; " << TOP() << "+= 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << "; " << TOP() << "+= 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void IpGoto::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " + << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << "goto " << _again << ";" << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " + << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << vCS() << " = " << nextDest << ";"; +} + +void IpGoto::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << vCS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ");"; +} + +void IpGoto::CURS( ostream &ret, bool inFinish ) +{ + ret << "(" << ps << ")"; +} + +void IpGoto::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << targState; +} + +void IpGoto::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << "{" << P() << "+= 1; "; + if ( !csForced ) + ret << vCS() << " = " << targState << "; "; + ret << "goto " << _out << ";}"; +} + +void IpGoto::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << "{" << P() << "+= 1; "; + if ( !csForced ) + ret << vCS() << " = " << targState << "; "; + ret << nbreak << " = 1;}"; +} + +void IpGoto::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void IpGoto::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + + +bool IpGoto::IN_TRANS_ACTIONS( RedStateAp *state ) +{ + bool anyWritten = false; + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInConds; it++ ) { + RedCondPair *trans = state->inConds[it]; + if ( trans->action != 0 ) { + /* Remember that we wrote an action so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write the label for the transition so it can be jumped to. */ + if ( ctrLabel[trans->id].isReferenced ) + out << "_ctr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << vCS() << " = " << trans->targ->id << ";\n"; + + if ( redFsm->anyRegNbreak() ) + out << nbreak << " = 0;\n"; + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( trans->targ->id, false, + trans->action->anyNextStmt() ) ); + out << "\n"; + } + + if ( redFsm->anyRegNbreak() ) { + out << + "if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + + /* If the action contains a next then we need to reload, otherwise + * jump directly to the target state. */ + if ( trans->action->anyNextStmt() ) + out << "goto " << _again << ";\n"; + else + out << "goto " << stLabel[trans->targ->id].reference() << ";\n"; + } + } + + + return anyWritten; +} + +void IpGoto::GOTO_HEADER( RedStateAp *state ) +{ +} + +void IpGoto::STATE_GOTO_ERROR() +{ +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &IpGoto::TRANS_GOTO( RedTransAp *trans ) +{ + if ( trans->condSpace == 0 || trans->condSpace->condSet.length() == 0 ) { + /* Existing. */ + assert( trans->numConds() == 1 ); + RedCondPair *cond = trans->outCond( 0 ); + if ( cond->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << "goto " << ctrLabel[trans->p.id].reference() << ";"; + } + else { + /* Go directly to the target state. */ + out << "goto " << stLabel[cond->targ->id].reference() << ";"; + } + } + else { + out << ck << " = 0;\n"; + for ( GenCondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " )\n" << ck << " += " << condValOffset << ";\n"; + } + CondKey lower = 0; + CondKey upper = trans->condFullSize() - 1; + COND_B_SEARCH( trans, lower, upper, 0, trans->numConds() - 1 ); + + if ( trans->errCond() != 0 ) { + COND_GOTO( trans->errCond() ) << "\n"; + } + } + + return out; +} + +/* Emit the goto to take for a given transition. */ +std::ostream &IpGoto::COND_GOTO( RedCondPair *cond ) +{ + /* Existing. */ + if ( cond->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << "goto " << ctrLabel[cond->id].reference() << ";"; + } + else { + /* Go directly to the target state. */ + out << "goto " << stLabel[cond->targ->id].reference() << ";"; + } + + return out; +} + +std::ostream &IpGoto::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( outLabel[st->id].isReferenced ) { + out << outLabel[st->id].define() << ": " << vCS() << " = " << + st->id << "; goto " << _out << "; \n"; + } + if ( popLabel[st->id].isReferenced ) { + out << popLabel[st->id].define() << ": " << vCS() << " = " << + st->id << "; goto " << _pop << "; \n"; + } + } + return out; +} + +std::ostream &IpGoto::AGAIN_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << + "case " << st->id << ": goto " << stLabel[st->id].reference() << ";\n"; + } + return out; +} + +std::ostream &IpGoto::STATE_GOTO_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << "case " << st->id << ":\n"; + out << "goto st_case_" << st->id << ";\n"; + } + return out; +} + +void IpGoto::NFA_PUSH( RedStateAp *state ) +{ + std::stringstream ss; + ss << state->id; + std::string _state = ss.str(); + + if ( redFsm->anyNfaStates() ) { + + if ( state->nfaTargs != 0 ) { + out << + "if ( " << ARR_REF( nfaOffsets ) << "[" << _state << "] != 0 ) {\n"; + + if ( red->nfaPrePushExpr != 0 ) { + out << + new_recs << " = " << state->nfaTargs->length() << ";\n"; + } + + if ( red->nfaPrePushExpr != 0 ) { + out << OPEN_HOST_BLOCK( red->nfaPrePushExpr ); + INLINE_LIST( out, red->nfaPrePushExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); + } + + int alt = 0; + for ( RedNfaTargs::Iter nt = *state->nfaTargs; nt.lte(); nt++ ) { + out << + " nfa_bp[nfa_len].state = " << nt->state->id << ";\n" + " nfa_bp[nfa_len].p = " << P() << ";\n"; + + if ( nt->popTest != 0 ) { + out << + " nfa_bp[nfa_len].popTrans = " << (nt->popTest->actListId+1) << ";\n"; + } + else if ( redFsm->bAnyNfaPops ) { + out << + " nfa_bp[nfa_len].popTrans = 0;\n"; + } + + if ( nt->push != 0 ) { + for ( GenActionTable::Iter item = nt->push->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + } + + out << + " nfa_len += 1;\n"; + + alt += 1; + } + + out << + "}\n"; + } + } +} + +std::ostream &IpGoto::STATE_GOTOS() +{ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + IN_TRANS_ACTIONS( st ); + + if ( stLabel[st->id].isReferenced ) + out << "_st" << st->id << ":\n"; + + /* Need to do this if the transition is an eof transition, or if + * the action contains fexec. Otherwise, no need. */ + if ( eof ) { + out << + "if ( " << P() << " == " << vEOF() << " )\n"; + + if ( st->isFinal || !redFsm->anyNfaStates() ) + out << "goto " << outLabel[st->id].reference() << ";\n"; + else + out << "goto " << popLabel[st->id].reference() << ";\n"; + } + + if ( st->toStateAction != 0 ) { + /* Write every action in the list. */ + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( st->id, false, + st->toStateAction->anyNextStmt() ) ); + out << "\n"; + } + } + + if ( st == redFsm->errState ) { + out << "st_case_" << st->id << ":\n"; + + /* Break out here. */ + if ( !redFsm->anyNfaStates() ) + out << "goto " << outLabel[st->id].reference() << ";\n"; + else + out << "goto " << popLabel[st->id].reference() << ";\n"; + } + else { + + /* Advance and test buffer pos. */ + if ( st->labelNeeded ) { + out << + P() << "+= 1;\n"; + } + + /* Give the st a switch case. */ + out << "st_case_" << st->id << ":\n"; + + if ( !noEnd ) { + if ( eof ) { + out << + "if ( " << P() << " == " << PE() << " && " << P() << " != " << vEOF() << " )\n" + " goto " << outLabel[st->id].reference() << ";\n"; + } + else { + out << + "if ( " << P() << " == " << PE() << " )\n" + " goto " << outLabel[st->id].reference() << ";\n"; + } + } + + + NFA_PUSH( st ); + + if ( st->fromStateAction != 0 ) { + /* Write every action in the list. */ + for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( st->id, false, + st->fromStateAction->anyNextStmt() ) ); + out << "\n"; + } + } + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( st->eofTrans != 0 ) + TRANS_GOTO( st->eofTrans ); + else { + if ( st->isFinal || !redFsm->anyNfaStates() ) + out << "goto " << outLabel[st->id].reference() << ";\n"; + else + out << "goto " << popLabel[st->id].reference() << ";\n"; + } + + out << + "}\n" + "else {\n"; + } + + /* Record the prev st if necessary. */ + if ( st->anyRegCurStateRef() ) + out << ps << " = " << st->id << ";\n"; + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + SINGLE_SWITCH( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) { + RANGE_B_SEARCH( st, keyOps->minKey, keyOps->maxKey, + 0, st->outRange.length() - 1 ); + } + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans ) << "\n"; + + if ( !noEnd && eof ) { + out << + "}\n"; + } + } + } + return out; +} + +std::ostream &IpGoto::FINISH_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + out << + "case " << st->id << ":\n"; + + TRANS_GOTO( st->eofTrans ); + } + } + + return out; +} + +void IpGoto::setLabelsNeeded( GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Goto: + case GenInlineItem::Call: + case GenInlineItem::Ncall: { + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( item->children ); + } +} + +void IpGoto::setLabelsNeeded( RedCondPair *pair ) +{ + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( pair->action == 0 || !pair->action->anyNextStmt() ) + pair->targ->labelNeeded = true; + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( pair->action != 0 ) { + /* Loop the actions. */ + for ( GenActionTable::Iter act = pair->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( act->value->inlineList ); + } + } +} + +/* Set up labelNeeded flag for each state. */ +void IpGoto::setLabelsNeeded() +{ + /* If we use the _again label, then we generate the _again switch, which + * uses all labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + setLabelsNeeded( &trans->p ); + } + + for ( CondApSet::Iter cond = redFsm->condSet; cond.lte(); cond++ ) + setLabelsNeeded( &cond->p ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + setLabelsNeeded( item->value->inlineList ); + } + } + } +} + +void IpGoto::writeData() +{ + STATE_IDS(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void IpGoto::NFA_FROM_STATE_ACTION_EXEC() +{ +// if ( redFsm->anyFromStateActions() ) { +// /* Unimplemented feature. Don't have the from state actions array in +// * this mode. Need to add it, or to alter the NFA pop codegen to be +// * consistent with the mode. */ +// assert( false ); +// } +} + +void IpGoto::writeExec() +{ + int maxCtrId = redFsm->nextCondId > redFsm->nextTransId ? redFsm->nextCondId : redFsm->nextTransId; + + stLabel = allocateLabels( stLabel, IpLabel::St, redFsm->nextStateId ); + ctrLabel = allocateLabels( ctrLabel, IpLabel::Ctr, maxCtrId ); + outLabel = allocateLabels( outLabel, IpLabel::Out, redFsm->nextStateId ); + popLabel = allocateLabels( popLabel, IpLabel::Pop, redFsm->nextStateId ); + + /* Must set labels immediately before writing because we may depend on the + * noend write option. */ + setLabelsNeeded(); + + out << "{\n"; + + DECLARE( INT(), cpc ); + DECLARE( INT(), ck ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), ps ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + + if ( _again.isReferenced ) { + out << + " goto " << _resume << ";\n" + "\n"; + + out << EMIT_LABEL( _again ); + + out << + " switch ( " << vCS() << " ) {\n"; + AGAIN_CASES() << + " }\n" + "\n"; + + } + + out << EMIT_LABEL( _resume ); + + out << "switch ( " << vCS() << " ) {\n"; + + STATE_GOTO_CASES(); + + out << "}\n"; + + STATE_GOTOS(); + + EXIT_STATES(); + + out << EMIT_LABEL( _pop ); + + if ( redFsm->anyNfaStates() ) { + out << + "if ( nfa_len == 0 )\n" + " goto " << _out << ";\n" + "\n"; + + out << + "nfa_count += 1;\n" + "nfa_len -= 1;\n" << + P() << " = nfa_bp[nfa_len].p;\n"; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + "if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + "else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + out << "goto " << _resume << ";\n"; + } + + out << EMIT_LABEL( _out ); + + out << + "}\n"; +} diff --git a/ragel/ipgoto.h b/ragel/ipgoto.h new file mode 100644 index 00000000..c58eb57a --- /dev/null +++ b/ragel/ipgoto.h @@ -0,0 +1,129 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IPGOTO_H +#define IPGOTO_H + +#include <iostream> +#include "goto.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * class FGotoCodeGen + */ +class IpGoto + : public Goto +{ +public: + IpGoto( const CodeGenArgs &args ) + : + Goto( args, Ip ), + stLabel(0), + ctrLabel(0), + outLabel(0), + popLabel(0) + {} + + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTransAp *trans ); + std::ostream &COND_GOTO( RedCondPair *trans ); + std::ostream &FINISH_CASES(); + std::ostream &AGAIN_CASES(); + std::ostream &STATE_GOTOS(); + std::ostream &STATE_GOTO_CASES(); + + /* unused. */ + virtual std::ostream &ACTION_SWITCH() { return out; } + virtual std::ostream &EXEC_FUNCS() { return out; } + virtual std::ostream &TO_STATE_ACTION_SWITCH() { return out; } + virtual std::ostream &FROM_STATE_ACTION_SWITCH() { return out; } + virtual std::ostream &EOF_ACTION_SWITCH() { return out; } + + /* Unused */ + virtual void FROM_STATE_ACTIONS() {} + virtual void TO_STATE_ACTIONS() {} + virtual void REG_ACTIONS() {} + virtual void EOF_ACTIONS() {} + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + virtual void genAnalysis(); + virtual void writeData(); + virtual void writeExec(); + +protected: + bool useAgainLabel(); + + /* Called from Goto::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedStateAp *state ); + void GOTO_HEADER( RedStateAp *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded( RedCondPair *pair ); + void setLabelsNeeded( GenInlineList *inlineList ); + void setLabelsNeeded(); + + void NFA_PUSH_ACTION( RedNfaTarg *targ ); + void NFA_POP_TEST( RedNfaTarg *targ ); + virtual void NFA_FROM_STATE_ACTION_EXEC(); + + void NFA_PUSH( RedStateAp *state ); + + void tableDataPass(); + + IpLabel *stLabel; + IpLabel *ctrLabel; + IpLabel *outLabel; + IpLabel *popLabel; +}; + +namespace C +{ + class IpGoto + : + public ::IpGoto + { + public: + IpGoto( const CodeGenArgs &args ) + : ::IpGoto( args ) + {} + }; +} + +#endif diff --git a/ragel/libragel.h b/ragel/libragel.h new file mode 100644 index 00000000..ad328e86 --- /dev/null +++ b/ragel/libragel.h @@ -0,0 +1,32 @@ +/* + * Copyright 2016-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LIBRAGEL_H +#define _LIBRAGEL_H + +#ifdef __cplusplus +#define EXTERN_C extern "C" +#else +#define EXTERN_C +#endif + +#endif diff --git a/ragel/load.cc b/ragel/load.cc new file mode 100644 index 00000000..47aee0d4 --- /dev/null +++ b/ragel/load.cc @@ -0,0 +1,88 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "load.h" +#include "ragel.h" +#include "inputdata.h" +#include "parsedata.h" +#include "parsetree.h" + +#include <colm/colm.h> +#include <colm/tree.h> +#include <errno.h> +#include <fstream> + +using std::endl; +using std::ifstream; + +extern colm_sections rlparse_object; + +char *unescape( const char *s, int slen ) +{ + char *out = new char[slen+1]; + char *d = out; + + for ( int i = 0; i < slen; ) { + if ( s[i] == '\\' ) { + switch ( s[i+1] ) { + case '0': *d++ = '\0'; break; + case 'a': *d++ = '\a'; break; + case 'b': *d++ = '\b'; break; + case 't': *d++ = '\t'; break; + case 'n': *d++ = '\n'; break; + case 'v': *d++ = '\v'; break; + case 'f': *d++ = '\f'; break; + case 'r': *d++ = '\r'; break; + default: *d++ = s[i+1]; break; + } + i += 2; + } + else { + *d++ = s[i]; + i += 1; + } + } + *d = 0; + return out; +} + +char *unescape( const char *s ) +{ + return unescape( s, strlen(s) ); +} + +InputLoc::InputLoc( colm_location *pcloc ) +{ + if ( pcloc != 0 ) { + fileName = pcloc->name; + line = pcloc->line; + col = pcloc->column; + } + else { + fileName = 0; + line = -1; + col = -1; + } + + if ( fileName == 0 ) + fileName = "-"; +} diff --git a/ragel/load.h b/ragel/load.h new file mode 100644 index 00000000..6ef7d57c --- /dev/null +++ b/ragel/load.h @@ -0,0 +1,37 @@ +/* + * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LOAD_H +#define _LOAD_H + +#include "ragel.h" + +struct LoadRagel; +struct InputData; +struct HostLang; + +LoadRagel *newLoadRagel( InputData &id, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ); +void loadRagel( LoadRagel *lr, const char *inputFileName ); +void deleteLoadRagel( LoadRagel * ); + +#endif diff --git a/ragel/longest.cc b/ragel/longest.cc new file mode 100644 index 00000000..bf1b2a54 --- /dev/null +++ b/ragel/longest.cc @@ -0,0 +1,571 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <iomanip> +#include <sstream> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <inputdata.h> + +/* Parsing. */ +#include "ragel.h" +#include "parsetree.h" +#include "parsedata.h" + +void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph ) +{ + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & STB_ISMARKED ) { + ms->lmItemSet.insert( 0 ); + ms->stateBits &= ~ STB_ISMARKED; + } + } + + /* Transfer the first item of non-empty lmAction tables to the item sets + * of the states that follow. Exclude states that have no transitions out. + * This must happen on a separate pass so that on each iteration of the + * next pass we have the item set entries from all lmAction tables. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + TransDataAp *tdap = trans->tdap(); + if ( tdap->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = tdap->lmActionTable.data; + StateAp *toState = tdap->toState; + assert( toState ); + + /* Can only optimize this if there are no transitions out. + * Note there can be out transitions going nowhere with + * actions and they too must inhibit this optimization. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & STB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ STB_ISMARKED; + } + } + } + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->lmActionTable.length() > 0 ) { + + LmActionTableEl *lmAct = cond->lmActionTable.data; + StateAp *toState = cond->toState; + assert( toState ); + + /* Can only optimize this if there are no transitions out. + * Note there can be out transitions going nowhere with + * actions and they too must inhibit this optimization. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & STB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ STB_ISMARKED; + } + } + } + } + } + } + } + } + + /* The lmItem sets are now filled, telling us which longest match rules + * can succeed in which states. First determine if we need to make sure + * act is defaulted to zero. We need to do this if there are any states + * with lmItemSet.length() > 1 and NULL is included. That is, that the + * switch may get called when in fact nothing has been matched. */ + int maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & STB_ISMARKED ) { + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ STB_ISMARKED; + } + } + + /* The actions executed on starting to match a token. */ + FsmRes res = FsmAp::isolateStartState( graph ); + graph = res.fsm; + graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart ); + graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + if ( maxItemSetLength > 1 ) { + /* The longest match action switch may be called when tokens are + * matched, in which case act must be initialized, there must be a + * case to handle the error, and the generated machine will require an + * error state. */ + lmSwitchHandlesError = true; + pd->fsmCtx->lmRequiresErrorState = true; + graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); + } + + /* The place to store transitions to restart. It maybe possible for the + * restarting to affect the searching through the graph that follows. For + * now take the safe route and save the list of transitions to restart + * until after all searching is done. */ + Vector<TransAp*> restartData; + Vector<CondAp*> restartCond; + + /* Set actions that do immediate token recognition, set the longest match part + * id and set the token ending. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + TransDataAp *tdap = trans->tdap(); + if ( tdap->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = tdap->lmActionTable.data; + StateAp *toState = tdap->toState; + assert( toState ); + + /* Can only optimize this if there are no transitions out. + * Note there can be out transitions going nowhere with + * actions and they too must inhibit this optimization. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. + * + * NOTE: When we need to inhibit on_last due to leaving + * actions the above test suffices. If the state has out + * actions then it will fail because the out action will + * have been transferred to an error transition, which + * makes the outlist non-empty. */ + tdap->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartData.append( trans ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & STB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ STB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + tdap->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + tdap->actionTable.setAction( lmAct->key, + lmAct->value->setActId ); + } + } + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = cond->lmActionTable.data; + StateAp *toState = cond->toState; + assert( toState ); + + /* Can only optimize this if there are no transitions out. + * Note there can be out transitions going nowhere with + * actions and they too must inhibit this optimization. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. + * + * NOTE: When we need to inhibit on_last due to leaving + * actions the above test suffices. If the state has out + * actions then it will fail because the out action will + * have been transferred to an error transition, which + * makes the outlist non-empty. */ + cond->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartCond.append( cond ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & STB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ STB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + cond->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + cond->actionTable.setAction( lmAct->key, lmAct->value->setActId ); + } + } + } + } + } + } + } + + /* Now that all graph searching is done it certainly safe set the + * restarting. It may be safe above, however this must be verified. */ + for ( Vector<TransAp*>::Iter pt = restartData; pt.lte(); pt++ ) + restart( graph, *pt ); + + for ( Vector<CondAp*>::Iter pt = restartCond; pt.lte(); pt++ ) + restart( graph, *pt ); + + int lmErrActionOrd = pd->fsmCtx->curActionOrd++; + + /* Embed the error for recognizing a char. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { + if ( st->isFinState() ) { + /* On error execute the onActNext action, which knows that + * the last character of the token was one back and restart. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actOnNext, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actOnNext ); + st->eofTarget = graph->startState; + } + else { + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actLagBehind, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actLagBehind ); + st->eofTarget = graph->startState; + } + } + else if ( st->lmItemSet.length() > 1 ) { + /* Need to use the select. Take note of which items the select + * is needed for so only the necessary actions are included. */ + for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { + if ( *plmi != 0 ) + (*plmi)->inLmSelect = true; + } + /* On error, execute the action select and go to the start state. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &lmActSelect, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); + st->eofTarget = graph->startState; + } + } + + /* Finally, the start state should be made final. */ + graph->setFinState( graph->startState ); +} + +/* Build the individual machines, setting up the NFA transitions to final + * states as we go. This is the base, unoptimized configuration. Later on we + * look to eliminate NFA transitions. Return the union of all machines. */ +FsmRes LongestMatch::buildBaseNfa( ParseData *pd ) +{ + int nfaOrder = 1; + FsmAp **parts = new FsmAp*[longestMatchList->length()]; + + /* Make each part of the longest match. */ + LmPartList::Iter lmi = longestMatchList->last(); + for ( int i = longestMatchList->length() - 1; lmi.gtb(); lmi--, i-- ) { + /* Create the machine and embed the setting of the longest match id. */ + FsmRes res = lmi->join->walk( pd ); + if ( !res.success() ) + return res; + + parts[i] = res.fsm; + + StateSet origFin = parts[i]->finStateSet; + for ( StateSet::Iter fin = origFin; fin.lte(); fin++ ) { + StateAp *orig = *fin; + StateAp *newFinal = parts[i]->addState(); + + newFinal->lmNfaParts.insert( lmi ); + + NfaTrans *trans = new NfaTrans( nfaOrder++ ); + if ( orig->nfaOut == 0 ) + orig->nfaOut = new NfaTransList; + orig->nfaOut->append( trans ); + parts[i]->attachToNfa( orig, newFinal, trans ); + + if ( orig->outPriorTable.length() > 0 ) { + newFinal->outPriorTable.insert( orig->outPriorTable ); + orig->outPriorTable.empty(); + } + if ( orig->outActionTable.length() > 0 ) { + newFinal->outActionTable.insert( orig->outActionTable ); + orig->outActionTable.empty(); + } + if ( orig->outCondSpace != 0 ) { + newFinal->outCondSpace = orig->outCondSpace; + newFinal->outCondKeys.insert( orig->outCondKeys ); + orig->outCondSpace = 0; + orig->outCondKeys.empty(); + } + + parts[i]->unsetFinState( orig ); + parts[i]->setFinState( newFinal ); + } + } + + /* Union machines one and up with machine zero. The grammar dictates that + * there will always be at least one part. */ + FsmRes fsm( FsmRes::Fsm(), parts[0] ); + for ( int i = 1; i < longestMatchList->length(); i++ ) { + fsm = FsmAp::unionOp( fsm, parts[i] ); + if ( !fsm.success() ) + return fsm; + } + + /* Create a new, isolated start state into which we can embed tokstart + * functions. */ + fsm = FsmAp::isolateStartState( fsm ); + if ( !fsm.success() ) + return fsm; + + fsm->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart ); + fsm->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + + KeyOps *keyOps = pd->fsmCtx->keyOps; + + /* Draw the trasition back to the start state. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->lmNfaParts.length() > 0 ) { + assert( st->lmNfaParts.length() == 1 ); + + /*TransAp *newTrans = */fsm->attachNewTrans( st, + fsm->startState, keyOps->minKey, keyOps->maxKey ); + + fsm->transferOutData( st, st ); + if ( st->outCondSpace != 0 ) + FsmAp::embedCondition( fsm, st, st->outCondSpace->condSet, st->outCondKeys ); + + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + trans->tdap()->actionTable.setAction( pd->fsmCtx->curActionOrd++, st->lmNfaParts[0]->actNfaOnNext ); + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) + cond->actionTable.setAction( pd->fsmCtx->curActionOrd++, st->lmNfaParts[0]->actNfaOnNext ); + } + } + + st->eofActionTable.setAction( pd->fsmCtx->curActionOrd++, st->lmNfaParts[0]->actNfaOnEof ); + } + } + + delete[] parts; + return fsm; +} + +bool LongestMatch::matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ) +{ + if ( st->outCondSpace != 0 ) + return true; + + return false; +} + + +void LongestMatch::eliminateNfaActions( ParseData *pd, FsmAp *fsm ) +{ + /* + * Once the union is complete we can optimize by advancing actions so they + * happen sooner, then draw the final transitions back to the start state. + * First step is to remove epsilon transitions that will never be taken. + */ + bool modified = true; + while ( modified ) { + modified = false; + + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + /* Check if the nfa parts list is non-empty (meaning we have a final + * state created for matching a pattern). */ + if ( st->lmNfaParts.length() > 0 && st->nfaIn != 0 ) { + /* Check if it can fail. If it can fail, then we cannot + * eliminate the prior candidates. If it can't fail then it is + * acceptable to eliminate the prior NFA transitions because we + * will never backtrack to follow them.*/ + if ( matchCanFail( pd, fsm, st ) ) + continue; + + for ( NfaInList::Iter in = *st->nfaIn; in.lte(); in++ ) { + StateAp *fromState = in->fromState; + /* Go forward until we get to the in-transition that cannot + * fail. Stop there because we are interested in what's + * before. */ + for ( NfaTransList::Iter to = *fromState->nfaOut; to.lte(); to++ ) { + if ( to->order < in->order ) { + /* Can nuke the epsilon transition that we will never + * follow. */ + fsm->detachFromNfa( fromState, to->toState, to ); + fromState->nfaOut->detach( to ); + delete to; + + modified = true; + goto restart; + } + } + } + } + } + + restart: {} + } +} + +bool LongestMatch::onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ) +{ + if ( st->nfaOut != 0 && st->nfaOut->length() == 1 && st->nfaOut->head == in ) + return true; + return false; +} + +/* Advance NFA actions to the final character of the pattern match. This only + * works when the machine cannot move forward more. */ +void LongestMatch::advanceNfaActions( ParseData *pd, FsmAp *fsm ) +{ + /* + * Advance actions to the final transition of the pattern match. + */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + /* IS OUT COND SPACE ALL? */ + if ( st->lmNfaParts.length() > 0 && st->nfaIn != 0 ) { + /* Only concern ourselves with final states that cannot fail. */ + if ( matchCanFail( pd, fsm, st ) ) + continue; + + /* If there are any out actions we cannot advance because we need + * to execute on the following character. We canot move to on-last, + * but in the next pass maybe we can eliminate the NFA action and + * move on leaving. */ + if ( st->outActionTable.length() > 0 ) + continue; + + for ( NfaInList::Iter in = *st->nfaIn; in.lte(); in++ ) { + + StateAp *fromState = in->fromState; + if ( !fsm->anyRegularTransitions( fromState ) && + onlyOneNfa( pd, fsm, fromState, in ) ) + { + /* Can nuke. */ + for ( TransInList::Iter t = fromState->inTrans; t.lte(); t++ ) { + t->actionTable.setAction( pd->fsmCtx->curActionOrd++, + st->lmNfaParts[0]->actNfaOnLast ); + } + for ( CondInList::Iter t = fromState->inCond; t.lte(); t++ ) { + t->actionTable.setAction( pd->fsmCtx->curActionOrd++, + st->lmNfaParts[0]->actNfaOnLast ); + } + + fsm->moveInwardTrans( fsm->startState, fromState ); + } + } + } + } +} + + +FsmRes LongestMatch::mergeNfaStates( ParseData *pd, FsmAp *fsm ) +{ +again: + /* + * Advance actions to the final transition of the pattern match. + */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + /* IS OUT COND SPACE ALL? */ + if ( st->lmNfaParts.length() > 0 && st->nfaIn != 0 ) { + /* Only concern ourselves with final states that cannot fail. */ + if ( matchCanFail( pd, fsm, st ) ) + continue; + + for ( NfaInList::Iter in = *st->nfaIn; in.lte(); in++ ) { + + StateAp *fromState = in->fromState; + if ( !fsm->anyRegularTransitions( fromState ) && + onlyOneNfa( pd, fsm, fromState, in ) ) + { + /* Can apply the NFA transition, eliminating it. */ + FsmAp::applyNfaTrans( fsm, fromState, st, fromState->nfaOut->head ); + goto again; + } + } + } + } + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +FsmRes LongestMatch::walkNfa( ParseData *pd ) +{ + /* The longest match has it's own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Build the machines. */ + FsmRes fsm = buildBaseNfa( pd ); + if ( !fsm.success() ) + return fsm; + + /* Optimization passes. */ + eliminateNfaActions( pd, fsm ); + advanceNfaActions( pd, fsm ); + fsm = mergeNfaStates( pd, fsm ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + return fsm; +} diff --git a/ragel/main.cc b/ragel/main.cc new file mode 100644 index 00000000..e9c5db39 --- /dev/null +++ b/ragel/main.cc @@ -0,0 +1,31 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inputdata.h" + +extern struct colm_sections rlparseC; + +int main( int argc, const char **argv ) +{ + InputData id( &hostLangC, &rlparseC, 0 ); + return id.main( argc, argv ); +} diff --git a/ragel/parsedata.cc b/ragel/parsedata.cc new file mode 100644 index 00000000..d3474684 --- /dev/null +++ b/ragel/parsedata.cc @@ -0,0 +1,1490 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> + +#include "ragel.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "version.h" +#include "inputdata.h" +#include <colm/tree.h> + +using namespace std; + +const char mainMachine[] = "main"; + +void Token::_set( const char *str, int len ) +{ + length = len; + data = new char[len+1]; + memcpy( data, str, len ); + data[len] = 0; +} + +void Token::set( const char *str, int len, colm_location *cl ) +{ + _set( str, len ); + + if ( cl != 0 ) { + loc.fileName = cl->name; + loc.line = cl->line; + loc.col = cl->column; + } +} + +void Token::set( colm_data *cd, colm_location *cl ) +{ + set( cd->data, cd->length, cl ); +} + +void Token::set( const char *str, int len, const InputLoc &l ) +{ + _set( str, len ); + + loc.fileName = l.fileName; + loc.line = l.line; + loc.col = l.col; +} + +void Token::set( const char *str, int len, const ParserLoc &l ) +{ + _set( str, len ); + loc = l; +} + +void RedToken::set( colm_data *cd, colm_location *cl ) +{ + data = cd->data; + length = cd->length; + loc.fileName = cl->name; + loc.line = cl->line; + loc.col = cl->column; +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmAp *fsm ) +{ + int numTrans = 0; + StateAp *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = pd->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) { + pd->id->error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( unusedBits && pd->alphType->isSigned && ul >> (size * 8 - 1) ) + ul |= ( -1L >> (size*8) ) << (size*8); + + return Key( (long)ul ); +} + +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) +{ + if ( pd->alphType->isSigned ) { + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + long long minVal = pd->alphType->sMinVal; + long long maxVal = pd->alphType->sMaxVal; + + long long ll = strtoll( str, 0, 10 ); + + /* Check for underflow. */ + if ( ( errno == ERANGE && ll < 0 ) || ll < minVal ) { + pd->id->error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ll = minVal; + } + /* Check for overflow. */ + else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) { + pd->id->error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ll = maxVal; + } + + return Key( (long)ll ); + } + else { + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + unsigned long long minVal = pd->alphType->uMinVal; + unsigned long long maxVal = pd->alphType->uMaxVal; + + unsigned long long ull = strtoull( str, 0, 10 ); + + /* Check for underflow. */ + if ( ( errno == ERANGE && ull < 0 ) || ull < minVal ) { + pd->id->error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ull = minVal; + } + /* Check for overflow. */ + else if ( ( errno == ERANGE && ull > 0 ) || ull > maxVal ) { + pd->id->error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ull = maxVal; + } + + return Key( (unsigned long)ull ); + } +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, ParseData *pd ) +{ + if ( pd->fsmCtx->keyOps->isSigned ) { + /* Copy from a char type. */ + return Key( c ); + } + else { + /* Copy from an unsigned byte type. */ + return Key( (unsigned char)c ); + } +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) +{ + if ( pd->fsmCtx->keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, const char *data, int len, + bool caseInsensitive, ParseData *pd ) +{ + /* Use a transitions list for getting unique keys. */ + if ( pd->fsmCtx->keyOps->isSigned ) { + /* Copy from a char star type. */ + const char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } + else { + /* Copy from an unsigned byte ptr type. */ + const unsigned char *src = (unsigned char*) data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) +{ + /* FsmAp created to return. */ + FsmAp *retFsm = 0; + bool isSigned = pd->fsmCtx->keyOps->isSigned; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = FsmAp::dotFsm( pd->fsmCtx ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = FsmAp::rangeFsm( pd->fsmCtx, 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + if ( isSigned ) + retFsm = FsmAp::rangeFsm( pd->fsmCtx, -128, 127 ); + else + retFsm = FsmAp::rangeFsm( pd->fsmCtx, 0, 255 ); + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmAp *upper = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'Z' ); + FsmAp *lower = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'z' ); + FsmRes res = FsmAp::unionOp( upper, lower ); + upper = res.fsm; + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = FsmAp::rangeFsm( pd->fsmCtx, '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmAp *digit = FsmAp::rangeFsm( pd->fsmCtx, '0', '9' ); + FsmAp *upper = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'Z' ); + FsmAp *lower = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'z' ); + FsmRes res1 = FsmAp::unionOp( digit, upper ); + digit = res1.fsm; + FsmRes res2 = FsmAp::unionOp( digit, lower ); + digit = res2.fsm; + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmAp *cntrl = FsmAp::rangeFsm( pd->fsmCtx, 0, 31 ); + FsmAp *highChar = FsmAp::concatFsm( pd->fsmCtx, 127 ); + FsmRes res = FsmAp::unionOp( cntrl, highChar ); + cntrl = res.fsm; + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = FsmAp::rangeFsm( pd->fsmCtx, '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = FsmAp::rangeFsm( pd->fsmCtx, ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmAp *range1 = FsmAp::rangeFsm( pd->fsmCtx, '!', '/' ); + FsmAp *range2 = FsmAp::rangeFsm( pd->fsmCtx, ':', '@' ); + FsmAp *range3 = FsmAp::rangeFsm( pd->fsmCtx, '[', '`' ); + FsmAp *range4 = FsmAp::rangeFsm( pd->fsmCtx, '{', '~' ); + + FsmRes res1 = FsmAp::unionOp( range1, range2 ); + range1 = res1.fsm; + FsmRes res2 = FsmAp::unionOp( range1, range3 ); + range1 = res2.fsm; + FsmRes res3 = FsmAp::unionOp( range1, range4 ); + range1 = res3.fsm; + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmAp *cntrl = FsmAp::rangeFsm( pd->fsmCtx, '\t', '\r' ); + FsmAp *space = FsmAp::concatFsm( pd->fsmCtx, ' ' ); + FsmRes res = FsmAp::unionOp( cntrl, space ); + cntrl = res.fsm; + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmAp *digit = FsmAp::rangeFsm( pd->fsmCtx, '0', '9' ); + FsmAp *upper = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'F' ); + FsmAp *lower = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'f' ); + + FsmRes res1 = FsmAp::unionOp( digit, upper ); + digit = res1.fsm; + FsmRes res2 = FsmAp::unionOp( digit, lower ); + digit = res2.fsm; + + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = FsmAp::lambdaFsm( pd->fsmCtx ); + break; + } + case BT_Empty: { + retFsm = FsmAp::emptyFsm( pd->fsmCtx ); + break; + }} + + return retFsm; +} + +/* Check if this name inst or any name inst below is referenced. */ +bool NameInst::anyRefsRec() +{ + if ( numRefs > 0 ) + return true; + + /* Recurse on children until true. */ + for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { + if ( (*ch)->anyRefsRec() ) + return true; + } + + return false; +} + +NameInst::~NameInst() +{ + /* Recurse on the implicit final state and then all children. */ + if ( final != 0 ) + delete final; + for ( NameVect::Iter name = childVect; name.lte(); name++ ) + delete *name; +} + +/* + * ParseData + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +ParseData::ParseData( InputData *id, string sectionName, + int machineId, const InputLoc §ionLoc, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ) +: + sectionName(sectionName), + sectionGraph(0), + /* 0 is reserved for global error actions. */ + nextLocalErrKey(1), + nextNameId(0), + alphTypeSet(false), + lowerNum(0), + upperNum(0), + id(id), + machineId(machineId), + sectionLoc(sectionLoc), + rootName(0), + exportsRootName(0), + nextEpsilonResolvedLink(0), + nextLongestMatchId(1), + nextRepId(1), + cgd(0) +{ + fsmCtx = new FsmCtx( id ); + + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + initGraphDict(); + +} + +/* Clean up the data collected during a parse. */ +ParseData::~ParseData() +{ + graphDict.empty(); + fsmCtx->actionList.empty(); + + if ( fsmCtx->nameIndex != 0 ) + delete[] fsmCtx->nameIndex; + + if ( rootName != 0 ) + delete rootName; + if ( exportsRootName != 0 ) + delete exportsRootName; + + delete fsmCtx; +} + +ifstream *InputData::tryOpenInclude( const char **pathChecks, long &found ) +{ + const char **check = pathChecks; + ifstream *inFile = new ifstream; + + while ( *check != 0 ) { + inFile->open( *check ); + if ( inFile->is_open() ) { + found = check - pathChecks; + return inFile; + } + + /* + * 03/26/2011 jg: + * Don't rely on sloppy runtime behaviour: reset the state of the stream explicitly. + * If inFile->open() fails, which happens when include dirs are tested, the fail bit + * is set by the runtime library. Currently the VS runtime library opens new files, + * but when it comes to reading it refuses to work. + */ + inFile->clear(); + + check += 1; + } + + found = -1; + delete inFile; + return 0; +} + +bool isAbsolutePath( const char *path ) +{ +#ifdef _WIN32 + return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\'; +#else + return path[0] == '/'; +#endif +} + +#ifdef _WIN32 +#define PATH_SEP '\\' +#else +#define PATH_SEP '/' +#endif + + +const char **InputData::makeIncludePathChecks( const char *thisFileName, const char *data ) +{ + const char **checks = 0; + long nextCheck = 0; + int length = strlen(data); + + /* Absolute path? */ + if ( isAbsolutePath( data ) ) { + checks = new const char*[2]; + checks[nextCheck++] = data; + } + else { + checks = new const char*[2 + includePaths.length()]; + + /* Search from the the location of the current file. */ + const char *lastSlash = strrchr( thisFileName, PATH_SEP ); + if ( lastSlash == 0 ) + checks[nextCheck++] = data; + else { + long givenPathLen = (lastSlash - thisFileName) + 1; + long checklen = givenPathLen + length; + char *check = new char[checklen+1]; + memcpy( check, thisFileName, givenPathLen ); + memcpy( check+givenPathLen, data, length ); + check[checklen] = 0; + checks[nextCheck++] = check; + } + + /* Search from the include paths given on the command line. */ + for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) { + long pathLen = strlen( *incp ); + long checkLen = pathLen + 1 + length; + char *check = new char[checkLen+1]; + memcpy( check, *incp, pathLen ); + check[pathLen] = PATH_SEP; + memcpy( check+pathLen+1, data, length ); + check[checkLen] = 0; + checks[nextCheck++] = check; + } + } + + checks[nextCheck] = 0; + return checks; +} + + +/* An approximate check for duplicate includes. Due to aliasing of files it's + * possible for duplicates to creep in. */ +bool ParseData::duplicateInclude( const char *inclFileName, const char *inclSectionName ) +{ + for ( IncludeHistory::iterator hi = includeHistory.begin(); hi != includeHistory.end(); hi++ ) { + if ( strcmp( hi->fileName.c_str(), inclFileName ) == 0 && + strcmp( hi->sectionName.c_str(), inclSectionName ) == 0 ) + { + return true; + } + } + return false; +} + + +/* Make a name id in the current name instantiation scope if it is not + * already there. */ +NameInst *ParseData::addNameInst( const InputLoc &loc, std::string data, bool isLabel ) +{ + /* Create the name instantitaion object and insert it. */ + NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); + curNameInst->childVect.append( newNameInst ); + if ( !data.empty() ) { + NameMapEl *inDict = 0; + if ( curNameInst->children.insert( data, &inDict ) ) + inDict->value = new NameMapVal; + inDict->value->vals.append( newNameInst ); + } + return newNameInst; +} + +void ParseData::initNameWalk() +{ + curNameInst = rootName; + curNameChild = 0; +} + +void ParseData::initExportsNameWalk() +{ + curNameInst = exportsRootName; + curNameChild = 0; +} + +/* Goes into the next child scope. The number of the child is already set up. + * We need this for the syncronous name tree and parse tree walk to work + * properly. It is reset on entry into a scope and advanced on poping of a + * scope. A call to enterNameScope should be accompanied by a corresponding + * popNameScope. */ +NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) +{ + /* Save off the current data. */ + NameFrame retFrame; + retFrame.prevNameInst = curNameInst; + retFrame.prevNameChild = curNameChild; + retFrame.prevLocalScope = localNameScope; + + /* Enter into the new name scope. */ + for ( int i = 0; i < numScopes; i++ ) { + curNameInst = curNameInst->childVect[curNameChild]; + curNameChild = 0; + } + + if ( isLocal ) + localNameScope = curNameInst; + + return retFrame; +} + +/* Return from a child scope to a parent. The parent info must be specified as + * an argument and is obtained from the corresponding call to enterNameScope. + * */ +void ParseData::popNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild+1; + localNameScope = frame.prevLocalScope; +} + +void ParseData::resetNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild; + localNameScope = frame.prevLocalScope; +} + + +void ParseData::unsetObsoleteEntries( FsmAp *graph ) +{ + /* Loop the reference names and increment the usage. Names that are no + * longer needed will be unset in graph. */ + for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { + /* Get the name. */ + NameInst *name = *ref; + name->numUses += 1; + + /* If the name is no longer needed unset its corresponding entry. */ + if ( name->numUses == name->numRefs ) { + assert( graph->entryPoints.find( name->id ) != 0 ); + graph->unsetEntry( name->id ); + assert( graph->entryPoints.find( name->id ) == 0 ); + } + } +} + +NameSet ParseData::resolvePart( NameInst *refFrom, + const std::string &data, bool recLabelsOnly ) +{ + /* Queue needed for breadth-first search, load it with the start node. */ + NameInstList nameQueue; + nameQueue.append( refFrom ); + + NameSet result; + while ( nameQueue.length() > 0 ) { + /* Pull the next from location off the queue. */ + NameInst *from = nameQueue.detachFirst(); + + /* Look for the name. */ + NameMapEl *el = from->children.find( data ); + if ( el != 0 ) { + /* Record all instances of the name. */ + for ( Vector<NameInst*>::Iter low = el->value->vals; low.lte(); low++ ) + result.insert( *low ); + } + + /* Name not there, do breadth-first operation of appending all + * childrent to the processing queue. */ + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { + if ( !recLabelsOnly || (*name)->isLabel ) + nameQueue.append( *name ); + } + } + + /* Queue exhausted and name never found. */ + return result; +} + +void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, + NameRef *nameRef, int namePos ) +{ + /* Look for the name in the owning scope of the factor with aug. */ + NameSet partResult = resolvePart( refFrom, nameRef->data[namePos], false ); + + /* If there are more parts to the name then continue on. */ + if ( ++namePos < nameRef->length() ) { + /* There are more components to the name, search using all the part + * results as the base. */ + for ( NameSet::Iter name = partResult; name.lte(); name++ ) + resolveFrom( result, *name, nameRef, namePos ); + } + else { + /* This is the last component, append the part results to the final + * results. */ + result.insert( partResult ); + } +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == "" ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +ostream &operator<<( ostream &out, const NameInst &nameInst ) +{ + /* Count the number fully qualified name parts. */ + int numParents = 0; + NameInst *curParent = nameInst.parent; + while ( curParent != 0 ) { + numParents += 1; + curParent = curParent->parent; + } + + /* Make an array and fill it in. */ + curParent = nameInst.parent; + NameInst **parents = new NameInst*[numParents]; + for ( int p = numParents-1; p >= 0; p-- ) { + parents[p] = curParent; + curParent = curParent->parent; + } + + /* Write the parents out, skip the root. */ + for ( int p = 1; p < numParents; p++ ) + out << "::" << ( !parents[p]->name.empty() ? parents[p]->name : "<ANON>" ); + + /* Write the name and cleanup. */ + out << "::" << ( !nameInst.name.empty() ? nameInst.name : "<ANON>" ); + delete[] parents; + return out; +} + +struct CmpNameInstLoc +{ + static int compare( const NameInst *ni1, const NameInst *ni2 ) + { + if ( ni1->loc.line < ni2->loc.line ) + return -1; + else if ( ni1->loc.line > ni2->loc.line ) + return 1; + else if ( ni1->loc.col < ni2->loc.col ) + return -1; + else if ( ni1->loc.col > ni2->loc.col ) + return 1; + return 0; + } +}; + +void ParseData::errorStateLabels( const NameSet &resolved ) +{ + MergeSort<NameInst*, CmpNameInstLoc> mergeSort; + mergeSort.sort( resolved.data, resolved.length() ); + for ( NameSet::Iter res = resolved; res.lte(); res++ ) + id->error((*res)->loc) << " -> " << **res << endl; +} + + +NameInst *ParseData::resolveStateRef( NameRef *nameRef, InputLoc &loc, Action *action ) +{ + NameInst *nameInst = 0; + + /* Do the local search if the name is not strictly a root level name + * search. */ + if ( nameRef->data[0] != "" ) { + /* If the action is referenced, resolve all of them. */ + if ( action != 0 && action->embedRoots.length() > 0 ) { + /* Look for the name in all referencing scopes. */ + NameSet resolved; + for ( NameInstVect::Iter actRef = action->embedRoots; actRef.lte(); actRef++ ) + resolveFrom( resolved, *actRef, nameRef, 0 ); + + if ( resolved.length() > 0 ) { + /* Take the first one. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + id->error(loc) << "state reference " << *nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + } + + /* If not found in the local scope, look in global. */ + if ( nameInst == 0 ) { + NameSet resolved; + int fromPos = nameRef->data[0] != "" ? 0 : 1; + resolveFrom( resolved, rootName, nameRef, fromPos ); + + if ( resolved.length() > 0 ) { + /* Take the first. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + id->error(loc) << "state reference " << *nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + if ( nameInst == 0 ) { + /* If not found then complain. */ + id->error(loc) << "could not resolve state reference " << *nameRef << endl; + } + return nameInst; +} + +void ParseData::resolveNameRefs( InlineList *inlineList, Action *action ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Entry: case InlineItem::Goto: + case InlineItem::Call: case InlineItem::Ncall: + case InlineItem::Next: { + /* Resolve, pass action for local search. */ + NameInst *target = resolveStateRef( item->nameRef, item->loc, action ); + + /* Name lookup error reporting is handled by resolveStateRef. */ + if ( target != 0 ) { + /* Check if the target goes into a longest match. */ + NameInst *search = target->parent; + while ( search != 0 ) { + if ( search->isLongestMatch ) { + id->error(item->loc) << "cannot enter inside a longest " + "match construction as an entry point" << endl; + break; + } + search = search->parent; + } + + /* Record the reference in the name. This will cause the + * entry point to survive to the end of the graph + * generating walk. */ + target->numRefs += 1; + } + + item->nameTarg = target; + break; + } + default: + break; + } + + /* Some of the item types may have children. */ + if ( item->children != 0 ) + resolveNameRefs( item->children, action ); + } +} + +/* Resolve references to labels in actions. */ +void ParseData::resolveActionNameRefs() +{ + for ( ActionList::Iter act = fsmCtx->actionList; act.lte(); act++ ) { + /* Only care about the actions that are referenced. */ + if ( act->embedRoots.length() > 0 ) + resolveNameRefs( act->inlineList, act ); + } +} + +/* Walk a name tree starting at from and fill the name index. */ +void ParseData::fillNameIndex( NameInst *from ) +{ + /* Fill the value for from in the name index. */ + fsmCtx->nameIndex[from->id] = from; + + /* Recurse on the implicit final state and then all children. */ + if ( from->final != 0 ) + fillNameIndex( from->final ); + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) + fillNameIndex( *name ); +} + +void ParseData::makeRootNames() +{ + /* Create the root name. */ + rootName = new NameInst( InputLoc(), 0, string(), nextNameId++, false ); + exportsRootName = new NameInst( InputLoc(), 0, string(), nextNameId++, false ); +} + +/* Build the name tree and supporting data structures. */ +void ParseData::makeNameTree( GraphDictEl *dictEl ) +{ + /* Set up curNameInst for the walk. */ + initNameWalk(); + + if ( dictEl != 0 ) { + /* A start location has been specified. */ + dictEl->value->makeNameTree( dictEl->loc, this ); + } + else { + /* First make the name tree. */ + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Recurse on the instance. */ + glel->value->makeNameTree( glel->loc, this ); + } + } + + /* The number of nodes in the tree can now be given by nextNameId */ + fsmCtx->nameIndex = new NameInst*[nextNameId]; + memset( fsmCtx->nameIndex, 0, sizeof(NameInst*)*nextNameId ); + fillNameIndex( rootName ); + fillNameIndex( exportsRootName ); +} + + +void ParseData::createBuiltin( const char *name, BuiltinMachine builtin ) +{ + Expression *expression = new Expression( builtin ); + Join *join = new Join( expression ); + MachineDef *machineDef = new MachineDef( join ); + VarDef *varDef = new VarDef( name, machineDef ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + graphDict.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void ParseData::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( const InputLoc &loc, const HostLang *hostLang, const char *s1, const char *s2 ) +{ + alphTypeLoc = loc; + userAlphType = findAlphType( hostLang, s1, s2 ); + alphTypeSet = true; + return userAlphType != 0; +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( const InputLoc &loc, const HostLang *hostLang, const char *s1 ) +{ + alphTypeLoc = loc; + userAlphType = findAlphType( hostLang, s1 ); + alphTypeSet = true; + return userAlphType != 0; +} + +bool ParseData::setVariable( const char *var, InlineList *inlineList ) +{ + bool set = true; + + if ( strcmp( var, "p" ) == 0 ) + fsmCtx->pExpr = inlineList; + else if ( strcmp( var, "pe" ) == 0 ) + fsmCtx->peExpr = inlineList; + else if ( strcmp( var, "eof" ) == 0 ) + fsmCtx->eofExpr = inlineList; + else if ( strcmp( var, "cs" ) == 0 ) + fsmCtx->csExpr = inlineList; + else if ( strcmp( var, "data" ) == 0 ) + fsmCtx->dataExpr = inlineList; + else if ( strcmp( var, "top" ) == 0 ) + fsmCtx->topExpr = inlineList; + else if ( strcmp( var, "stack" ) == 0 ) + fsmCtx->stackExpr = inlineList; + else if ( strcmp( var, "act" ) == 0 ) + fsmCtx->actExpr = inlineList; + else if ( strcmp( var, "ts" ) == 0 ) + fsmCtx->tokstartExpr = inlineList; + else if ( strcmp( var, "te" ) == 0 ) + fsmCtx->tokendExpr = inlineList; + else + set = false; + + return set; +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void ParseData::initKeyOps( const HostLang *hostLang ) +{ + /* Signedness and bounds. */ + alphType = alphTypeSet ? userAlphType : &hostLang->hostTypes[hostLang->defaultAlphType]; + fsmCtx->keyOps->setAlphType( hostLang, alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + fsmCtx->keyOps->minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + fsmCtx->keyOps->maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } +} + +void ParseData::printNameInst( std::ostream &out, NameInst *nameInst, int level ) +{ + for ( int i = 0; i < level; i++ ) + out << " "; + out << (!nameInst->name.empty() ? nameInst->name : "<ANON>") << + " id: " << nameInst->id << + " refs: " << nameInst->numRefs << + " uses: " << nameInst->numUses << endl; + for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) + printNameInst( out, *name, level+1 ); +} + +Action *ParseData::newLmCommonAction( const char *name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "NONE"; + + Action *action = new Action( loc, name, inlineList, fsmCtx->nextCondId++ ); + action->embedRoots.append( rootName ); + fsmCtx->actionList.append( action ); + return action; +} + +void ParseData::initLongestMatchData() +{ + if ( lmList.length() > 0 ) { + /* The initTokStart action resets the token start. */ + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + il1->head->children = new InlineList; + il1->head->children->append( new InlineItem( InputLoc(), + InlineItem::LmInitTokStart ) ); + initTokStart = newLmCommonAction( "initts", il1 ); + initTokStart->isLmAction = true; + + /* The initActId action gives act a default value. */ + InlineList *il4 = new InlineList; + il4->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + il4->head->children = new InlineList; + il4->head->children->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newLmCommonAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = new InlineList; + il5->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + il5->head->children = new InlineList; + il5->head->children->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newLmCommonAction( "ts", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = new InlineList; + il3->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + il3->head->children = new InlineList; + il3->head->children->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newLmCommonAction( "te", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initTokStartOrd = fsmCtx->curActionOrd++; + initActIdOrd = fsmCtx->curActionOrd++; + setTokStartOrd = fsmCtx->curActionOrd++; + setTokEndOrd = fsmCtx->curActionOrd++; + } +} + +/* After building the graph, do some extra processing to ensure the runtime + * data of the longest mactch operators is consistent. We want tokstart to be + * null when no token match is active. */ +void ParseData::longestMatchInitTweaks( FsmAp *graph ) +{ + if ( lmList.length() > 0 ) { + /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry) + * init the tokstart. */ + for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = en->value->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) { + /* We do this after the analysis pass, which reference counts + * the actions. Keep them up to date so we don't break the + * build. */ + initTokStart->numToStateRefs += 1; + actionTable.setAction( initTokStartOrd, initTokStart ); + } + } + + /* Find the set of states that are the target of transitions with + * actions that have calls. These states will be targeted by fret + * statements. */ + StateSet states; + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + for ( ActionTable::Iter ati = trans->tdap()->actionTable; ati.lte(); ati++ ) { + if ( ati->value->anyCall && trans->tdap()->toState != 0 ) + states.insert( trans->tdap()->toState ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + for ( ActionTable::Iter ati = cond->actionTable; ati.lte(); ati++ ) { + if ( ati->value->anyCall && cond->toState != 0 ) + states.insert( cond->toState ); + } + } + } + } + } + + + /* Init tokstart upon entering the above collected states. */ + for ( StateSet::Iter ps = states; ps.lte(); ps++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = (*ps)->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) { + /* We do this after the analysis pass, which reference counts + * the actions. Keep them up to date so we don't break the + * build. */ + initTokStart->numToStateRefs += 1; + actionTable.setAction( initTokStartOrd, initTokStart ); + } + } + } +} + +/* Always returns the breadth check result. Will not consume the fsm. */ +BreadthResult *ParseData::checkBreadth( FsmAp *fsm ) +{ + double start = 0; + int minDepth = 0; + FsmAp::breadthFromEntry( start, minDepth, id->histogram, fsm, fsm->startState ); + + BreadthResult *breadth = new BreadthResult( start ); + + for ( Vector<ParseData::Cut>::Iter c = cuts; c.lte(); c++ ) { + for ( EntryMap::Iter mel = fsm->entryPoints; mel.lte(); mel++ ) { + if ( mel->key == c->entryId ) { + double cost = 0; + int minDepth = 0; + FsmAp::breadthFromEntry( cost, minDepth, id->histogram, fsm, mel->value ); + + breadth->costs.append( BreadthCost( c->name, cost ) ); + } + } + } + + return breadth; +} + + +static void resultWrite( ostream &out, long code, long id, const char *scode ) +{ + out << code << " " << id << " " << scode << endl; +} + +void ParseData::analysisResult( long code, long _id, const char *scode ) +{ + stringstream out; + resultWrite( out, code, _id, scode ); + id->comm = out.str(); +} + +void ParseData::reportBreadthResults( BreadthResult *breadth ) +{ + stringstream out; + + out << std::fixed << std::setprecision(10); + + out << "COST START " << + ( breadth->start ) << " " << + ( 1 ) << endl; + + for ( Vector<BreadthCost>::Iter c = breadth->costs; c.lte(); c++ ) { + out << "COST " << c->name << " " << + ( breadth->start ) << " " << + ( ( c->cost / breadth->start ) ) << endl; + } + + this->id->comm += out.str(); +} + +void ParseData::reportAnalysisResult( FsmRes &res ) +{ + if ( res.type == FsmRes::TypeTooManyStates ) + analysisResult( 1, 0, "too-many-states" ); + + else if ( res.type == FsmRes::TypeCondCostTooHigh ) + analysisResult( 20, res.id, "cond-cost" ); + + else if ( res.type == FsmRes::TypePriorInteraction ) + analysisResult( 60, res.id, "prior-interaction" ); +} + + +/* Make the graph from a graph dict node. Does minimization and state sorting. */ +FsmRes ParseData::makeInstance( GraphDictEl *gdNode ) +{ + if ( id->printStatistics ) + id->stats() << "compiling\t" << sectionName << endl; + + if ( id->stateLimit > 0 ) + fsmCtx->stateLimit = id->stateLimit; + + /* Build the graph from a walk of the parse tree. */ + FsmRes graph = gdNode->value->walk( this ); + + if ( id->stateLimit > 0 ) + fsmCtx->stateLimit = FsmCtx::STATE_UNLIMITED; + + /* Perform the breadth computation. This does not affect the FSM result. We + * compute and print and move on. Higher up we catch the checkBreadth flag + * and stop output. */ + if ( graph.success() && id->checkBreadth ) { + BreadthResult *breadth = checkBreadth( graph.fsm ); + reportBreadthResults( breadth ); + } + + if ( id->condsCheckDepth >= 0 ) { + /* Use this to expand generalized repetition to past the nfa union + * choice point. */ + fsmCtx->condsCheckDepth = id->condsCheckDepth; + graph = FsmAp::condCostSearch( graph.fsm ); + } + + if ( !graph.success() ) { + reportAnalysisResult( graph ); + return graph; + } + + fsmCtx->finalizeInstance( graph.fsm ); + + return graph; +} + +void ParseData::printNameTree( ostream &out ) +{ + /* Print the name instance map. */ + for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) + printNameInst( out, *name, 0 ); + + out << "name index:" << endl; + /* Show that the name index is correct. */ + for ( int ni = 0; ni < nextNameId; ni++ ) { + out << ni << ": "; + std::string name = fsmCtx->nameIndex[ni]->name; + out << ( !name.empty() ? name : "<ANON>" ) << endl; + } +} + +FsmRes ParseData::makeSpecific( GraphDictEl *gdNode ) +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( gdNode ); + + /* Resove name references from gdNode. */ + initNameWalk(); + gdNode->value->resolveNameRefs( this ); + + /* Do not resolve action references. Since we are not building the entire + * graph there's a good chance that many name references will fail. This + * is okay since generating part of the graph is usually only done when + * inspecting the compiled machine. */ + + /* Same story for extern entry point references. */ + + /* Flag this case so that the XML code generator is aware that we haven't + * looked up name references in actions. It can then avoid segfaulting. */ + fsmCtx->generatingSectionSubset = true; + + /* Just building the specified graph. */ + initNameWalk(); + FsmRes mainGraph = makeInstance( gdNode ); + + return mainGraph; +} + +FsmRes ParseData::makeAll() +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( 0 ); + + /* Resove name references in the tree. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) + glel->value->resolveNameRefs( this ); + + /* Resolve action code name references. */ + resolveActionNameRefs(); + + /* Force name references to the top level instantiations. */ + for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) + (*inst)->numRefs += 1; + + FsmAp *mainGraph = 0; + FsmAp **graphs = new FsmAp*[instanceList.length()]; + int numOthers = 0; + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + FsmRes res = makeInstance( glel ); + if ( !res.success() ) { + for ( int i = 0; i < numOthers; i++ ) + delete graphs[i]; + delete[] graphs; + return res; + } + + /* Main graph is always instantiated. */ + if ( glel->key == MAIN_MACHINE ) + mainGraph = res.fsm; + else + graphs[numOthers++] = res.fsm; + } + + if ( mainGraph == 0 ) + mainGraph = graphs[--numOthers]; + + if ( numOthers > 0 ) { + /* Add all the other graphs into main. */ + mainGraph->globOp( graphs, numOthers ); + } + + delete[] graphs; + return FsmRes( FsmRes::Fsm(), mainGraph ); +} + + +void ParseData::makeExportsNameTree() +{ + /* Make a name tree for the exports. */ + initExportsNameWalk(); + + /* First make the name tree. */ + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + if ( gdel->value->isExport ) { + /* Recurse on the instance. */ + gdel->value->makeNameTree( gdel->loc, this ); + } + } +} + +void ParseData::makeExports() +{ + makeExportsNameTree(); + + /* Resove name references in the tree. */ + initExportsNameWalk(); + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + if ( gdel->value->isExport ) + gdel->value->resolveNameRefs( this ); + } + + /* Make all the instantiations, we know that main exists in this list. */ + initExportsNameWalk(); + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + /* Check if this var def is an export. */ + if ( gdel->value->isExport ) { + /* Build the graph from a walk of the parse tree. */ + FsmRes graph = gdel->value->walk( this ); + + /* Build the graph from a walk of the parse tree. */ + if ( !graph.fsm->checkSingleCharMachine() ) { + id->error(gdel->loc) << "bad export machine, must define " + "a single character" << endl; + } + else { + /* Safe to extract the key and declare the export. */ + Key exportKey = graph.fsm->startState->outList.head->lowKey; + fsmCtx->exportList.append( new Export( gdel->value->name, exportKey ) ); + } + } + } +} + +FsmRes ParseData::prepareMachineGen( GraphDictEl *graphDictEl, const HostLang *hostLang ) +{ + initKeyOps( hostLang ); + makeRootNames(); + initLongestMatchData(); + + /* Make the graph, do minimization. */ + if ( graphDictEl == 0 ) { + FsmRes res = makeAll(); + if ( !res.success() ) + return res; + sectionGraph = res.fsm; + } + else { + FsmRes res = makeSpecific( graphDictEl ); + if ( !res.success() ) + return res; + sectionGraph = res.fsm; + } + + /* If any errors have occured in the input file then don't write anything. */ + if ( id->errorCount > 0 ) + return FsmRes( FsmRes::InternalError() ); + + fsmCtx->analyzeGraph( sectionGraph ); + + /* Depends on the graph analysis. */ + longestMatchInitTweaks( sectionGraph ); + + fsmCtx->prepareReduction( sectionGraph ); + + return FsmRes( FsmRes::Fsm(), sectionGraph ); +} + +void ParseData::generateReduced( const char *inputFileName, CodeStyle codeStyle, + std::ostream &out, const HostLang *hostLang ) +{ + Reducer *red = new Reducer( this->id, fsmCtx, sectionGraph, sectionName, machineId ); + red->make( hostLang, alphType ); + + CodeGenArgs args( this->id, red, alphType, machineId, inputFileName, sectionName, out, codeStyle ); + + args.lineDirectives = !id->noLineDirectives; + args.forceVar = id->forceVar; + args.loopLabels = hostLang->loopLabels; + + /* Write out with it. */ + cgd = (*hostLang->makeCodeGen)( hostLang, args ); + + /* Code generation anlysis step. */ + cgd->genAnalysis(); +} + +#if 0 +void ParseData::generateXML( ostream &out ) +{ + /* Make the generator. */ + XMLCodeGen codeGen( sectionName, machineId, id, this, sectionGraph, out ); + + /* Write out with it. */ + codeGen.writeXML(); +} +#endif + +void ParseData::clear() +{ + cgd->clear(); + + delete sectionGraph; + sectionGraph = 0; + + graphDict.empty(); + + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + fsmCtx->actionList.empty(); +} diff --git a/ragel/parsedata.h b/ragel/parsedata.h new file mode 100644 index 00000000..d45de5a6 --- /dev/null +++ b/ragel/parsedata.h @@ -0,0 +1,429 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PARSEDATA_H +#define _PARSEDATA_H + +#include <iostream> +#include <limits.h> +#include <sstream> +#include <vector> +#include <set> + +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "fsmgraph.h" +#include "compare.h" +#include "vector.h" +#include "common.h" +#include "parsetree.h" +#include "action.h" + + +/* Forwards. */ +using std::ostream; + +struct VarDef; +struct Join; +struct Expression; +struct Term; +struct FactorWithAug; +struct FactorWithLabel; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct LongestMatch; +struct CodeGenData; +struct InputData; +struct InputItem; + +typedef DList<LongestMatch> LmList; + +/* This is used for tracking the include files/machine pairs. */ +struct IncludeHistoryItem +{ + IncludeHistoryItem( const char *fileName, const char *sectionName ) + : fileName(fileName), sectionName(sectionName) {} + + std::string fileName; + std::string sectionName; +}; + +typedef std::vector<IncludeHistoryItem> IncludeHistory; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl<GraphDictEl>, + public DListEl<GraphDictEl> +{ + GraphDictEl( std::string k ) + : key(k), value(0), isInstance(false) { } + GraphDictEl( std::string k, VarDef *value ) + : key(k), value(value), isInstance(false) { } + + ~GraphDictEl() + { + delete value; + } + + std::string getKey() { return key; } + + std::string key; + VarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<GraphDictEl, std::string, CmpString> GraphDict; +typedef DList<GraphDictEl> GraphList; + +/* Priority name dictionary. */ +typedef AvlMapEl<std::string, int> PriorDictEl; +typedef AvlMap<std::string, int, CmpString> PriorDict; + +/* Local error name dictionary. */ +typedef AvlMapEl<std::string, int> LocalErrDictEl; +typedef AvlMap<std::string, int, CmpString> LocalErrDict; + +struct NameMapVal +{ + Vector<NameInst*> vals; +}; + +/* Tree of instantiated names. */ +typedef AvlMapEl<std::string, NameMapVal*> NameMapEl; +typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap; +typedef Vector<NameInst*> NameVect; +typedef BstSet<NameInst*> NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + ~NameInst(); + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + std::string name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList<NameInst> NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +struct LengthDef +{ + LengthDef( char *name ) + : name(name) {} + + char *name; + LengthDef *prev, *next; +}; + +typedef DList<LengthDef> LengthDefList; + +extern const int ORD_PUSH; +extern const int ORD_RESTORE; +extern const int ORD_COND; +extern const int ORD_COND2; +extern const int ORD_TEST; + +/* Class to collect information about the machine during the + * parse of input. */ +struct ParseData +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + ParseData( InputData *id, std::string sectionName, + int machineId, const InputLoc §ionLoc, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ); + ~ParseData(); + + /* + * Setting up the graph dict. + */ + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin( const char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *addNameInst( const InputLoc &loc, std::string data, bool isLabel ); + void makeRootNames(); + void makeNameTree( GraphDictEl *gdNode ); + void makeExportsNameTree(); + void fillNameIndex( NameInst *from ); + + /* Increments the usage count on entry names. Names that are no longer + * needed will have their entry points unset. */ + void unsetObsoleteEntries( FsmAp *graph ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, const std::string &data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + NameRef *nameRef, int namePos ); + NameInst *resolveStateRef( NameRef *nameRef, InputLoc &loc, Action *action ); + void resolveNameRefs( InlineList *inlineList, Action *action ); + void resolveActionNameRefs(); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( const InputLoc &loc, const HostLang *hostLang, + const char *s1 ); + bool setAlphType( const InputLoc &loc, const HostLang *hostLang, + const char *s1, const char *s2 ); + + /* Override one of the variables ragel uses. */ + bool setVariable( const char *var, InlineList *inlineList ); + + /* Dumping the name instantiation tree. */ + void printNameInst( std::ostream &out, NameInst *nameInst, int level ); + void printNameTree( std::ostream &out ); + + void analysisResult( long code, long id, const char *scode ); + + void reportBreadthResults( BreadthResult *breadth ); + BreadthResult *checkBreadth( FsmAp *fsm ); + void reportAnalysisResult( FsmRes &res ); + + /* Make the graph from a graph dict node. Does minimization. */ + FsmRes makeInstance( GraphDictEl *gdNode ); + FsmRes makeSpecific( GraphDictEl *gdNode ); + FsmRes makeAll(); + + void makeExports(); + + FsmRes prepareMachineGen( GraphDictEl *graphDictEl, const HostLang *hostLang ); + void generateXML( ostream &out ); + void generateReduced( const char *inputFileName, CodeStyle codeStyle, + std::ostream &out, const HostLang *hostLang ); + + std::string sectionName; + FsmAp *sectionGraph; + + void initKeyOps( const HostLang *hostLang ); + + void errorStateLabels( const NameSet &resolved ); + + /* + * Data collected during the parse. + */ + + /* Dictionary of graphs. Both instances and non-instances go here. */ + GraphDict graphDict; + + /* The list of instances. */ + GraphList instanceList; + + /* Dictionary of actions. Lets actions be defined and then referenced. */ + ActionDict actionDict; + + /* Dictionary of named priorities. */ + PriorDict priorDict; + + /* Dictionary of named local errors. */ + LocalErrDict localErrDict; + + /* Various next identifiers. */ + int nextLocalErrKey, nextNameId; + + /* The default priority number key for a machine. This is active during + * the parse of the rhs of a machine assignment. */ + int curDefPriorKey; + + int curDefLocalErrKey; + + /* Alphabet type. */ + HostType *alphType; + HostType *userAlphType; + bool alphTypeSet; + InputLoc alphTypeLoc; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + InputData *id; + + /* The name of the file the fsm is from, and the spec name. */ + int machineId; + InputLoc sectionLoc; + + /* Root of the name tree. One root is for the instantiated machines. The + * other root is for exported definitions. */ + NameInst *rootName; + NameInst *exportsRootName; + + /* Name tree walking. */ + NameInst *curNameInst; + int curNameChild; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + void longestMatchInitTweaks( FsmAp *graph ); + void initNameWalk(); + void initExportsNameWalk(); + NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } + NameFrame enterNameScope( bool isLocal, int numScopes ); + void popNameScope( const NameFrame &frame ); + void resetNameScope( const NameFrame &frame ); + + void nfaTermCheckKleeneZero(); + void nfaTermCheckMinZero(); + void nfaTermCheckPlusZero(); + void nfaTermCheckRepZero(); + void nfaTermCheckZeroReps(); + + void clear(); + + /* Counter for assigning ids to longest match items. */ + int nextLongestMatchId; + + int nextRepId; + + /* List of all longest match parse tree items. */ + LmList lmList; + + Action *newLmCommonAction( const char *name, InlineList *inlineList ); + + Action *initTokStart; + int initTokStartOrd; + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + LengthDefList lengthDefList; + + CodeGenData *cgd; + + struct Cut + { + Cut( std::string name, int entryId ) + : name(name), entryId(entryId) {} + + std::string name; + int entryId; + }; + + /* Track the cuts we set in the fsm graph. We perform cost analysis on the + * built fsm graph for each of these entry points. */ + Vector<Cut> cuts; + + ParseData *prev, *next; + + FsmCtx *fsmCtx; + + /* Make a list of places to look for an included file. */ + bool duplicateInclude( const char *inclFileName, const char *inclSectionName ); + + IncludeHistory includeHistory; + + std::set<std::string> actionParams; +}; + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyChar( char c, ParseData *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ); +void makeFsmUniqueKeyArray( KeySet &result, const char *data, int len, + bool caseInsensitive, ParseData *pd ); +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ); +FsmAp *dotFsm( ParseData *pd ); +FsmAp *dotStarFsm( ParseData *pd ); + +Key *prepareHexString( ParseData *pd, const InputLoc &loc, + const char *data, long length, long &resLen ); +char *prepareLitString( InputData *id, const InputLoc &loc, const char *data, long length, + long &resLen, bool &caseInsensitive ); +const char *checkLitOptions( InputData *id, const InputLoc &loc, + const char *data, int length, bool &caseInsensitive ); + +#endif diff --git a/ragel/parsetree.cc b/ragel/parsetree.cc new file mode 100644 index 00000000..f35a6299 --- /dev/null +++ b/ragel/parsetree.cc @@ -0,0 +1,2199 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <iomanip> +#include <sstream> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <inputdata.h> + +/* Parsing. */ +#include "ragel.h" +#include "parsetree.h" +#include "parsedata.h" + +using namespace std; +ostream &operator<<( ostream &out, const NameRef &nameRef ); +ostream &operator<<( ostream &out, const NameInst &nameInst ); + +/* Read string literal (and regex) options and return the true end. */ +const char *checkLitOptions( InputData *id, const InputLoc &loc, + const char *data, int length, bool &caseInsensitive ) +{ + const char *end = data + length - 1; + while ( *end != '\'' && *end != '\"' && *end != '/' ) { + if ( *end == 'i' ) + caseInsensitive = true; + else { + id->error( loc ) << "literal string '" << *end << + "' option not supported" << endl; + } + end -= 1; + } + return end; +} + +/* Convert the literal string which comes in from the scanner into an array of + * characters with escapes and options interpreted. Also null terminates the + * string. Though this null termination should not be relied on for + * interpreting literals in the parser because the string may contain \0 */ +char *prepareLitString( InputData *id, const InputLoc &loc, const char *data, long length, + long &resLen, bool &caseInsensitive ) +{ + char *resData = new char[length+1]; + caseInsensitive = false; + + const char *src = data + 1; + const char *end = checkLitOptions( id, loc, data, length, caseInsensitive ); + + char *dest = resData; + long dlen = 0; + while ( src != end ) { + if ( *src == '\\' ) { + switch ( src[1] ) { + case '0': dest[dlen++] = '\0'; break; + case 'a': dest[dlen++] = '\a'; break; + case 'b': dest[dlen++] = '\b'; break; + case 't': dest[dlen++] = '\t'; break; + case 'n': dest[dlen++] = '\n'; break; + case 'v': dest[dlen++] = '\v'; break; + case 'f': dest[dlen++] = '\f'; break; + case 'r': dest[dlen++] = '\r'; break; + case '\n': break; + default: dest[dlen++] = src[1]; break; + } + src += 2; + } + else { + dest[dlen++] = *src++; + } + } + + resLen = dlen; + resData[resLen] = 0; + return resData; +} + +Key *prepareHexString( ParseData *pd, const InputLoc &loc, + const char *data, long length, long &resLen ) +{ + Key *dest = new Key[( length - 2 ) >> 1]; + const char *src = data; + const char *end = data + length; + long dlen = 0; + char s[3]; + + /* Scan forward over 0x. */ + src += 2; + + s[2] = 0; + while ( src < end ) { + s[0] = src[0]; + s[1] = src[1]; + + dest[dlen++] = makeFsmKeyHex( s, loc, pd ); + + /* Scan forward over the hex chars, then any whitespace or . characters. */ + src += 2; + while ( *src == ' ' || *src == '\t' || *src == '\n' || *src == '.' ) + src += 1; + + /* Scan forward over 0x. */ + src += 2; + } + + resLen = dlen; + return dest; +} + +FsmRes VarDef::walk( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse on the expression. */ + FsmRes rtnVal = machineDef->walk( pd ); + if ( !rtnVal.success() ) + return rtnVal; + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal.fsm->stateList; state.lte(); state++ ) + rtnVal.fsm->transferErrorActions( state, localErrDictEl->value ); + } + + /* If the expression below is a join operation with multiple expressions + * then it just had epsilon transisions resolved. If it is a join + * with only a single expression then run the epsilon op now. */ + if ( machineDef->type == MachineDef::JoinType && + machineDef->join->exprList.length() == 1 ) + { + rtnVal = FsmAp::epsilonOp( rtnVal.fsm ); + if ( !rtnVal.success() ) + return rtnVal; + } + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal.fsm ); + + /* If the name of the variable is referenced then add the entry point to + * the graph. */ + if ( pd->curNameInst->numRefs > 0 ) + rtnVal.fsm->setEntry( pd->curNameInst->id, rtnVal.fsm->startState ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + return rtnVal; +} + +void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd ) +{ + /* The variable definition enters a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, name, false ); + + if ( machineDef->type == MachineDef::LongestMatchType ) + pd->curNameInst->isLongestMatch = true; + + /* Recurse. */ + machineDef->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void VarDef::resolveNameRefs( ParseData *pd ) +{ + /* Entering into a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse. */ + machineDef->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +VarDef::~VarDef() +{ + delete machineDef; +} + +InputLoc LongestMatchPart::getLoc() +{ + return action != 0 ? action->loc : semiLoc; +} + +/* + * If there are any LMs then all of the following entry points must reset + * tokstart: + * + * 1. fentry(StateRef) + * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) + * 3. targt of any transition that has an fcall (the return loc). + * 4. start state of all longest match routines. + */ + +Action *LongestMatch::newLmAction( ParseData *pd, const InputLoc &loc, + const char *name, InlineList *inlineList ) +{ + Action *action = new Action( loc, name, inlineList, pd->fsmCtx->nextCondId++ ); + action->embedRoots.append( pd->curNameInst ); + pd->fsmCtx->actionList.append( action ); + action->isLmAction = true; + return action; +} + +void LongestMatch::makeActions( ParseData *pd ) +{ + /* Make actions that set the action id. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmSetActId ) ); + char *actName = new char[50]; + sprintf( actName, "store%i", lmi->longestMatchId ); + lmi->setActId = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the last + * character. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "last%i", lmi->longestMatchId ); + lmi->actOnLast = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the next + * character. These actions will set tokend themselves (it is the current + * char). */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "next%i", lmi->longestMatchId ); + lmi->actOnNext = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart at tokend. These + * actions execute some time after matching the last char. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLagBehind ) ); + char *actName = new char[50]; + sprintf( actName, "lag%i", lmi->longestMatchId ); + lmi->actLagBehind = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* + * NFA actions + * + * Actions that execute the user action and restart on the next character. + * These actions will set tokend themselves (it is the current char). They + * also reset the nfa machinery used to choose between tokens. + */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmNfaOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "nlast%i", lmi->longestMatchId ); + lmi->actNfaOnLast = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmNfaOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "nnext%i", lmi->longestMatchId ); + lmi->actNfaOnNext = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); + inlineList->head->children = new InlineList; + inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmNfaOnEof ) ); + char *actName = new char[50]; + sprintf( actName, "neof%i", lmi->longestMatchId ); + lmi->actNfaOnEof = newLmAction( pd, lmi->getLoc(), actName, inlineList ); + } + + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "NONE"; + + /* Create the error action. */ + InlineList *il6 = new InlineList; + il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); + lmActSelect = newLmAction( pd, loc, "switch", il6 ); +} + +void LongestMatch::findName( ParseData *pd ) +{ + NameInst *nameInst = pd->curNameInst; + while ( nameInst->name.empty() ) { + nameInst = nameInst->parent; + /* Since every machine must must have a name, we should always find a + * name for the longest match. */ + assert( nameInst != 0 ); + } + name = nameInst->name; +} + +void LongestMatch::makeNameTree( ParseData *pd ) +{ + /* Create an anonymous scope for the longest match. Will be used for + * restarting machine after matching a token. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, std::string(), false ); + + /* Recurse into all parts of the longest match operator. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) + lmi->join->makeNameTree( pd ); + + /* Traverse the name tree upwards to find a name for this lm. */ + findName( pd ); + + /* Also make the longest match's actions at this point. */ + makeActions( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void LongestMatch::resolveNameRefs( ParseData *pd ) +{ + /* The longest match gets its own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Take an action reference for each longest match item and recurse. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* Record the reference if the item has an action. */ + if ( lmi->action != 0 ) + lmi->action->embedRoots.append( pd->localNameScope ); + + /* Recurse down the join. */ + lmi->join->resolveNameRefs( pd ); + } + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +void LongestMatch::restart( FsmAp *graph, TransAp *trans ) +{ + StateAp *fromState = trans->tdap()->fromState; + graph->detachTrans( fromState, trans->tdap()->toState, trans->tdap() ); + graph->attachTrans( fromState, graph->startState, trans->tdap() ); +} + +void LongestMatch::restart( FsmAp *graph, CondAp *cti ) +{ + StateAp *fromState = cti->fromState; + graph->detachTrans( fromState, cti->toState, cti ); + graph->attachTrans( fromState, graph->startState, cti ); +} + +void LongestMatch::transferScannerLeavingActions( FsmAp *graph ) +{ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->outActionTable.length() > 0 ) + graph->setErrorActions( st, st->outActionTable ); + } +} + +FsmRes LongestMatch::walkClassic( ParseData *pd ) +{ + /* The longest match has it's own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Make each part of the longest match. */ + FsmAp **parts = new FsmAp*[longestMatchList->length()]; + LmPartList::Iter lmi = *longestMatchList; + for ( int i = 0; lmi.lte(); lmi++, i++ ) { + /* Create the machine and embed the setting of the longest match id. */ + FsmRes res = lmi->join->walk( pd ); + if ( !res.success() ) + return res; + + parts[i] = res.fsm; + parts[i]->longMatchAction( pd->fsmCtx->curActionOrd++, lmi ); + } + + /* Before we union the patterns we need to deal with leaving actions. They + * are transfered to error transitions out of the final states (like local + * error actions) and to eof actions. In the scanner we need to forbid + * on_last for any final state that has an leaving action. */ + for ( int i = 0; i < longestMatchList->length(); i++ ) + transferScannerLeavingActions( parts[i] ); + + /* Union machines one and up with machine zero. The grammar dictates that + * there will always be at least one part. */ + FsmRes res( FsmRes::Fsm(), parts[0] ); + for ( int i = 1; i < longestMatchList->length(); i++ ) { + res = FsmAp::unionOp( res.fsm, parts[i] ); + if ( !res.success() ) + return res; + } + + runLongestMatch( pd, res.fsm ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] parts; + return res; +} + + +FsmRes LongestMatch::walk( ParseData *pd ) +{ + if ( nfaConstruction ) + return walkNfa( pd ); + else + return walkClassic( pd ); +} + +NfaUnion::~NfaUnion() +{ + for ( TermVect::Iter term = terms; term.lte(); term++ ) + delete *term; + if ( roundsList != 0 ) + delete roundsList; +} + +FsmRes NfaUnion::walk( ParseData *pd ) +{ + if ( pd->id->printStatistics ) + pd->id->stats() << "nfa union terms\t" << terms.length() << endl; + + /* Compute the individual expressions. */ + long numMachines = 0; + FsmAp **machines = new FsmAp*[terms.length()]; + for ( TermVect::Iter term = terms; term.lte(); term++ ) { + FsmRes res = (*term)->walk( pd ); + if ( !res.success() ) { + /* Delete previos. */ + for ( int m = 0; m < numMachines; ) + delete machines[m]; + delete[] machines; + return res; + } + + machines[numMachines++] = res.fsm; + } + + std::ostream &stats = pd->id->stats(); + bool printStatistics = pd->id->printStatistics; + + return FsmAp::nfaUnion( *roundsList, machines, numMachines, stats, printStatistics ); +} + +void NfaUnion::makeNameTree( ParseData *pd ) +{ + for ( TermVect::Iter term = terms; term.lte(); term++ ) + (*term)->makeNameTree( pd ); +} + +void NfaUnion::resolveNameRefs( ParseData *pd ) +{ + for ( TermVect::Iter term = terms; term.lte(); term++ ) + (*term)->resolveNameRefs( pd ); +} + +FsmRes MachineDef::walk( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + return join->walk( pd ); + case LongestMatchType: + return longestMatch->walk( pd ); + case LengthDefType: + /* Towards lengths. */ + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); + case NfaUnionType: + return nfaUnion->walk( pd ); + } + return FsmRes( FsmRes::InternalError() ); +} + +void MachineDef::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + longestMatch->makeNameTree( pd ); + break; + case LengthDefType: + break; + case NfaUnionType: + nfaUnion->makeNameTree( pd ); + break; + } +} + +void MachineDef::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + longestMatch->resolveNameRefs( pd ); + break; + case LengthDefType: + break; + case NfaUnionType: + nfaUnion->resolveNameRefs( pd ); + break; + } +} + +MachineDef::~MachineDef() +{ + if ( join != 0 ) + delete join; + if ( longestMatch != 0 ) + delete longestMatch; + if ( lengthDef != 0 ) + delete lengthDef; + if ( nfaUnion != 0 ) + delete nfaUnion; +} + +/* Construct with a location and the first expression. */ +Join::Join( const InputLoc &loc, Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Construct with a location and the first expression. */ +Join::Join( Expression *expr ) +{ + exprList.append( expr ); +} + +/* Walk an expression node. */ +FsmRes Join::walk( ParseData *pd ) +{ + if ( exprList.length() == 1 ) + return exprList.head->walk( pd ); + + return walkJoin( pd ); +} + +/* There is a list of expressions to join. */ +FsmRes Join::walkJoin( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Evaluate the machines. */ + FsmAp **fsms = new FsmAp*[exprList.length()]; + ExprList::Iter expr = exprList; + for ( int e = 0; e < exprList.length(); e++, expr++ ) { + FsmRes res = expr->walk( pd ); + if ( !res.success() ) + return res; + fsms[e] = res.fsm; + } + + /* Get the start and final names. Final is + * guaranteed to exist, start is not. */ + NameInst *startName = pd->curNameInst->start; + NameInst *finalName = pd->curNameInst->final; + + int startId = -1; + if ( startName != 0 ) { + /* Take note that there was an implicit link to the start machine. */ + pd->localNameScope->referencedNames.append( startName ); + startId = startName->id; + } + + /* A final id of -1 indicates there is no epsilon that references the + * final state, therefor do not create one or set an entry point to it. */ + int finalId = -1; + if ( finalName->numRefs > 0 ) + finalId = finalName->id; + + /* Join machines 1 and up onto machine 0. */ + FsmRes res = FsmAp::joinOp( fsms[0], startId, finalId, fsms+1, exprList.length()-1 ); + if ( !res.success() ) + return res; + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( res.fsm ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] fsms; + return res; +} + +void Join::makeNameTree( ParseData *pd ) +{ + if ( exprList.length() > 1 ) { + /* Create the new anonymous scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, std::string(), false ); + + /* Join scopes need an implicit "final" target. */ + pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final", + pd->nextNameId++, false ); + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; + } + else { + /* Recurse into the single expression. */ + exprList.head->makeNameTree( pd ); + } +} + + +void Join::resolveNameRefs( ParseData *pd ) +{ + /* Branch on whether or not there is to be a join. */ + if ( exprList.length() > 1 ) { + /* The variable definition enters a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* The join scope must contain a start label. */ + NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true ); + if ( resolved.length() > 0 ) { + /* Take the first. */ + pd->curNameInst->start = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + pd->id->error(loc) << "join operation has multiple start labels" << endl; + pd->errorStateLabels( resolved ); + } + } + + /* Make sure there is a start label. */ + if ( pd->curNameInst->start != 0 ) { + /* There is an implicit reference to start name. */ + pd->curNameInst->start->numRefs += 1; + } + else { + /* No start label. */ + pd->id->error(loc) << "join operation has no start label" << endl; + } + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); + } + else { + /* Recurse into the single expression. */ + exprList.head->resolveNameRefs( pd ); + } +} + +/* Clean up after an expression node. */ +Expression::~Expression() +{ + if ( expression ) + delete expression; + if ( term ) + delete term; +} + +/* Evaluate a single expression node. */ +FsmRes Expression::walk( ParseData *pd, bool lastInSeq ) +{ + switch ( type ) { + case OrType: { + /* Evaluate the expression. */ + FsmRes exprFsm = expression->walk( pd, false ); + if ( !exprFsm.success() ) + return exprFsm; + + /* Evaluate the term. */ + FsmRes rhs = term->walk( pd ); + if ( !rhs.success() ) + return rhs; + + /* Perform union. */ + FsmRes res = FsmAp::unionOp( exprFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case IntersectType: { + /* Evaluate the expression. */ + FsmRes exprFsm = expression->walk( pd ); + if ( !exprFsm.success() ) + return exprFsm; + + /* Evaluate the term. */ + FsmRes rhs = term->walk( pd ); + if ( !rhs.success() ) + return rhs; + + /* Perform intersection. */ + FsmRes res = FsmAp::intersectOp( exprFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case SubtractType: { + /* Evaluate the expression. */ + FsmRes exprFsm = expression->walk( pd ); + if ( !exprFsm.success() ) + return exprFsm; + + /* Evaluate the term. */ + FsmRes rhs = term->walk( pd ); + if ( !rhs.success() ) + return rhs; + + /* Perform subtraction. */ + FsmRes res = FsmAp::subtractOp( exprFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case StrongSubtractType: { + /* Evaluate the expression. */ + FsmRes exprFsm = expression->walk( pd ); + if ( !exprFsm.success() ) + return exprFsm; + + FsmAp *leadAnyStar = FsmAp::dotStarFsm( pd->fsmCtx ); + FsmAp *trailAnyStar = FsmAp::dotStarFsm( pd->fsmCtx ); + + /* Evaluate the term and pad it with any* machines. */ + FsmRes termFsm = term->walk( pd ); + if ( !termFsm.success() ) + return termFsm; + + FsmRes res1 = FsmAp::concatOp( leadAnyStar, termFsm.fsm ); + if ( !res1.success() ) + return res1; + + FsmRes res2 = FsmAp::concatOp( res1.fsm, trailAnyStar ); + if ( !res2.success() ) + return res2; + + /* Perform subtraction. */ + FsmRes res3 = FsmAp::subtractOp( exprFsm.fsm, res2.fsm, lastInSeq ); + if ( !res3.success() ) + return res3; + + return res3; + } + case TermType: { + /* Return result of the term. */ + return term->walk( pd ); + } + case BuiltinType: { + /* Construct the builtin. */ + return FsmRes( FsmRes::Fsm(), makeBuiltin( builtin, pd ) ); + } + } + + return FsmRes( FsmRes::InternalError() ); +} + +void Expression::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->makeNameTree( pd ); + term->makeNameTree( pd ); + break; + case TermType: + term->makeNameTree( pd ); + break; + case BuiltinType: + break; + } +} + +void Expression::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->resolveNameRefs( pd ); + term->resolveNameRefs( pd ); + break; + case TermType: + term->resolveNameRefs( pd ); + break; + case BuiltinType: + break; + } +} + +/* Clean up after a term node. */ +Term::~Term() +{ + if ( term ) + delete term; + if ( factorWithAug ) + delete factorWithAug; +} + +/* Evaluate a term node. */ +FsmRes Term::walk( ParseData *pd, bool lastInSeq ) +{ + switch ( type ) { + case ConcatType: { + /* Evaluate the Term. */ + FsmRes termFsm = term->walk( pd, false ); + if ( !termFsm.success() ) + return termFsm; + + /* Evaluate the FactorWithRep. */ + FsmRes rhs = factorWithAug->walk( pd ); + if ( !rhs.success() ) { + delete termFsm.fsm; + return rhs; + } + + /* Perform concatenation. */ + FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case RightStartType: { + /* Evaluate the Term. */ + FsmRes termFsm = term->walk( pd ); + if ( !termFsm.success() ) + return termFsm; + + /* Evaluate the FactorWithRep. */ + FsmRes rhs = factorWithAug->walk( pd ); + if ( !rhs.success() ) { + delete termFsm.fsm; + return rhs; + } + + /* Perform concatenation. */ + FsmRes res = FsmAp::rightStartConcatOp( termFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case RightFinishType: { + /* Evaluate the Term. */ + FsmRes termFsm = term->walk( pd ); + if ( !termFsm.success() ) + return termFsm; + + /* Evaluate the FactorWithRep. */ + FsmRes rhs = factorWithAug->walk( pd ); + if ( !rhs.success() ) { + delete termFsm.fsm; + return rhs; + } + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the finishing transitions to the right + * get the higher priority. */ + priorDescs[0].key = pd->fsmCtx->nextPriorKey++; + priorDescs[0].priority = 0; + termFsm.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); + + /* The finishing transitions of the right machine get the higher + * priority. Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs.fsm->finishFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); + + /* If the right machine's start state is final we need to guard + * against the left machine persisting by moving through the empty + * string. */ + if ( rhs.fsm->startState->isFinState() ) { + rhs.fsm->startState->outPriorTable.setPrior( + pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); + } + + /* Perform concatenation. */ + FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case LeftType: { + /* Evaluate the Term. */ + FsmRes termFsm = term->walk( pd ); + if ( !termFsm.success() ) + return termFsm; + + /* Evaluate the FactorWithRep. */ + FsmRes rhs = factorWithAug->walk( pd ); + if ( !rhs.success() ) { + delete termFsm.fsm; + return rhs; + } + + /* Set up the priority descriptors. The left machine gets the + * higher priority. */ + priorDescs[0].key = pd->fsmCtx->nextPriorKey++; + priorDescs[0].priority = 1; + termFsm.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); + + /* The right machine gets the lower priority. We cannot use + * allTransPrior here in case the start state of the right machine + * is final. It would allow the right machine thread to run along + * with the left if just passing through the start state. Using + * startFsmPrior prevents this. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + rhs.fsm->startFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); + if ( !res.success() ) + return res; + + return res; + } + case FactorWithAugType: { + return factorWithAug->walk( pd ); + } + } + return FsmRes( FsmRes::InternalError() ); +} + +void Term::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->makeNameTree( pd ); + factorWithAug->makeNameTree( pd ); + break; + case FactorWithAugType: + factorWithAug->makeNameTree( pd ); + break; + } +} + +void Term::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->resolveNameRefs( pd ); + factorWithAug->resolveNameRefs( pd ); + break; + case FactorWithAugType: + factorWithAug->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with augmentation node. */ +FactorWithAug::~FactorWithAug() +{ + delete factorWithRep; + + /* Walk the vector of parser actions, deleting function names. */ + + /* Clean up priority descriptors. */ + if ( priorDescs != 0 ) + delete[] priorDescs; +} + +void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ) +{ + /* Assign actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + switch ( actions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmAction( actionOrd[i], actions[i].action ); + break; + case at_all: + graph->allTransAction( actionOrd[i], actions[i].action ); + break; + case at_finish: + graph->finishFsmAction( actionOrd[i], actions[i].action ); + break; + case at_leave: + graph->leaveFsmAction( actionOrd[i], actions[i].action ); + break; + + /* Global error actions. */ + case at_start_gbl_error: + graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_all_gbl_error: + graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_final_gbl_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_start_gbl_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_final_gbl_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_middle_gbl_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + + /* Local error actions. */ + case at_start_local_error: + graph->startErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_all_local_error: + graph->allErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_final_local_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_start_local_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_final_local_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_middle_local_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + + /* EOF actions. */ + case at_start_eof: + graph->startEOFAction( actionOrd[i], actions[i].action ); + break; + case at_all_eof: + graph->allEOFAction( actionOrd[i], actions[i].action ); + break; + case at_final_eof: + graph->finalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_eof: + graph->notStartEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_eof: + graph->notFinalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_middle_eof: + graph->middleEOFAction( actionOrd[i], actions[i].action ); + break; + + /* To State Actions. */ + case at_start_to_state: + graph->startToStateAction( actionOrd[i], actions[i].action ); + break; + case at_all_to_state: + graph->allToStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_to_state: + graph->finalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_to_state: + graph->notStartToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_to_state: + graph->notFinalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_to_state: + graph->middleToStateAction( actionOrd[i], actions[i].action ); + break; + + /* From State Actions. */ + case at_start_from_state: + graph->startFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_all_from_state: + graph->allFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_from_state: + graph->finalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_from_state: + graph->notStartFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_from_state: + graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_from_state: + graph->middleFromStateAction( actionOrd[i], actions[i].action ); + break; + + /* Remaining cases, prevented by the parser. */ + default: + assert( false ); + break; + } + } +} + +void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd ) +{ + /* Assign priorities. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) { + switch ( priorityAugs[i].type ) { + case at_start: + graph->startFsmPrior( priorOrd[i], &priorDescs[i]); + break; + case at_all: + graph->allTransPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_finish: + graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_leave: + graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + + default: + /* Parser Prevents this case. */ + break; + } + } +} + +void FactorWithAug::assignConditions( FsmAp *graph ) +{ + for ( int i = 0; i < conditions.length(); i++ ) { + switch ( conditions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmCondition( conditions[i].action, conditions[i].sense ); + break; + case at_all: + graph->allTransCondition( conditions[i].action, conditions[i].sense ); + break; + case at_leave: + graph->leaveFsmCondition( conditions[i].action, conditions[i].sense ); + break; + default: + break; + } + } +} + +/* Evaluate a factor with augmentation node. */ +FsmRes FactorWithAug::walk( ParseData *pd ) +{ + /* Enter into the scopes created for the labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.size() ); + + /* Make the array of function orderings. */ + int *actionOrd = 0; + if ( actions.length() > 0 ) + actionOrd = new int[actions.length()]; + + /* First walk the list of actions, assigning order to all starting + * actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type == at_start || + actions[i].type == at_start_gbl_error || + actions[i].type == at_start_local_error || + actions[i].type == at_start_to_state || + actions[i].type == at_start_from_state || + actions[i].type == at_start_eof ) + actionOrd[i] = pd->fsmCtx->curActionOrd++; + } + + /* Evaluate the factor with repetition. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) { + delete [] actionOrd; + return factorTree; + } + + FsmAp *rtnVal = factorTree.fsm; + + /* Compute the remaining action orderings. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type != at_start && + actions[i].type != at_start_gbl_error && + actions[i].type != at_start_local_error && + actions[i].type != at_start_to_state && + actions[i].type != at_start_from_state && + actions[i].type != at_start_eof ) + actionOrd[i] = pd->fsmCtx->curActionOrd++; + } + + /* Embed conditions. */ + assignConditions( rtnVal ); + + /* Embed actions. */ + assignActions( pd, rtnVal , actionOrd ); + + /* Make the array of priority orderings. Orderings are local to this walk + * of the factor with augmentation. */ + int *priorOrd = 0; + if ( priorityAugs.length() > 0 ) + priorOrd = new int[priorityAugs.length()]; + + /* Walk all priorities, assigning the priority ordering. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) + priorOrd[i] = pd->fsmCtx->curPriorOrd++; + + /* If the priority descriptors have not been made, make them now. Make + * priority descriptors for each priority asignment that will be passed to + * the fsm. Used to keep track of the key, value and used bit. */ + if ( priorDescs == 0 && priorityAugs.length() > 0 ) { + priorDescs = new PriorDesc[priorityAugs.length()]; + for ( int i = 0; i < priorityAugs.length(); i++ ) { + /* Init the prior descriptor for the priority setting. */ + priorDescs[i].key = priorityAugs[i].priorKey; + priorDescs[i].priority = priorityAugs[i].priorValue; + priorDescs[i].guarded = false; + priorDescs[i].guardId = 0; + } + } + + /* Assign priorities into the machine. */ + assignPriorities( rtnVal, priorOrd ); + + /* Assign epsilon transitions. */ + for ( int e = 0; e < epsilonLinks.length(); e++ ) { + /* Get the name, which may not exist. If it doesn't then silently + * ignore it because an error has already been reported. */ + NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; + if ( epTarg != 0 ) { + /* Make the epsilon transitions. */ + rtnVal->epsilonTrans( epTarg->id ); + + /* Note that we have made a link to the name. */ + pd->localNameScope->referencedNames.append( epTarg ); + } + } + + /* Set entry points for labels. */ + if ( labels.size() > 0 ) { + /* Pop the names. */ + pd->resetNameScope( nameFrame ); + + /* Make labels that are referenced into entry points. */ + for ( size_t i = 0; i < labels.size(); i++ ) { + pd->enterNameScope( false, 1 ); + + /* Will always be found. */ + NameInst *name = pd->curNameInst; + + /* If the name is referenced then set the entry point. */ + if ( name->numRefs > 0 ) + rtnVal->setEntry( name->id, rtnVal->startState ); + + if ( labels[i].cut ) + pd->cuts.append( ParseData::Cut( labels[i].data, name->id ) ); + } + + pd->popNameScope( nameFrame ); + } + + if ( priorOrd != 0 ) + delete[] priorOrd; + if ( actionOrd != 0 ) + delete[] actionOrd; + return FsmRes( FsmRes::Fsm(), rtnVal ); +} + +void FactorWithAug::makeNameTree( ParseData *pd ) +{ + /* Add the labels to the tree of instantiated names. Each label + * makes a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + for ( size_t i = 0; i < labels.size(); i++ ) { + pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true ); + + if ( labels[i].cut ) + pd->curNameInst->numRefs += 1; + } + + /* Recurse, then pop the names. */ + factorWithRep->makeNameTree( pd ); + pd->curNameInst = prevNameInst; +} + + +void FactorWithAug::resolveNameRefs( ParseData *pd ) +{ + /* Enter into the name scope created by any labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.size() ); + + /* Note action references. */ + for ( int i = 0; i < actions.length(); i++ ) + actions[i].action->embedRoots.append( pd->localNameScope ); + + /* Recurse first. IMPORTANT: we must do the exact same traversal as when + * the tree is constructed. */ + factorWithRep->resolveNameRefs( pd ); + + /* Resolve epsilon transitions. */ + for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) { + /* Get the link. */ + EpsilonLink &link = epsilonLinks[ep]; + NameInst *resolvedName = 0; + + if ( link.target->length() == 1 && link.target->data[0] == "final" ) { + /* Epsilon drawn to an implicit final state. An implicit final is + * only available in join operations. */ + resolvedName = pd->localNameScope->final; + } + else { + /* Do an search for the name. */ + NameSet resolved; + pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 ); + if ( resolved.length() > 0 ) { + /* Take the first one. */ + resolvedName = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + pd->id->error(link.loc) << "state reference " << link.target << + " resolves to multiple entry points" << endl; + pd->errorStateLabels( resolved ); + } + } + } + + /* This is tricky, we stuff resolved epsilon transitions into one long + * vector in the parse data structure. Since the name resolution and + * graph generation both do identical walks of the parse tree we + * should always find the link resolutions in the right place. */ + pd->epsilonResolvedLinks.append( resolvedName ); + + if ( resolvedName != 0 ) { + /* Found the name, bump of the reference count on it. */ + resolvedName->numRefs += 1; + } + else { + /* Complain, no recovery action, the epsilon op will ignore any + * epsilon transitions whose names did not resolve. */ + pd->id->error(link.loc) << "could not resolve label " << link.target << endl; + } + } + + if ( labels.size() > 0 ) + pd->popNameScope( nameFrame ); +} + + +/* Clean up after a factor with repetition node. */ +FactorWithRep::~FactorWithRep() +{ + switch ( type ) { + case StarType: case StarStarType: case OptionalType: case PlusType: + case ExactType: case MaxType: case MinType: case RangeType: + delete factorWithRep; + case FactorWithNegType: + delete factorWithNeg; + break; + } +} + + +/* Evaluate a factor with repetition node. */ +FsmRes FactorWithRep::walk( ParseData *pd ) +{ + switch ( type ) { + case StarType: { + /* Evaluate the FactorWithRep. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + factorTree.fsm->unsetFinState( factorTree.fsm->startState ); + } + + return FsmAp::starOp( factorTree.fsm ); + } + case StarStarType: { + /* Evaluate the FactorWithRep. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Set up the prior descs. All gets priority one, whereas leaving gets + * priority zero. Make a unique key so that these priorities don't + * interfere with any priorities set by the user. */ + priorDescs[0].key = pd->fsmCtx->nextPriorKey++; + priorDescs[0].priority = 1; + factorTree.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); + + /* Leaveing gets priority 0. Use same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + factorTree.fsm->leaveFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); + + return FsmAp::starOp( factorTree.fsm ); + } + case OptionalType: { + /* Evaluate the FactorWithRep. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + return FsmAp::questionOp( factorTree.fsm ); + } + case PlusType: { + /* Evaluate the FactorWithRep. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying plus operator to a machine that " + "accepts zero length word" << endl; + } + + return FsmAp::plusOp( factorTree.fsm ); + } + case ExactType: { + /* Evaluate the first FactorWithRep. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + /* Get an int from the repetition amount. */ + if ( lowerRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + pd->id->warning(loc) << "exactly zero repetitions results " + "in the null machine" << endl; + } + else { + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying repetition to a machine that " + "accepts zero length word" << endl; + } + } + + /* Handles the n == 0 case. */ + return FsmAp::exactRepeatOp( factorTree.fsm, lowerRep ); + } + case MaxType: { + /* Evaluate the first FactorWithRep. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + /* Get an int from the repetition amount. */ + if ( upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + pd->id->warning(loc) << "max zero repetitions results " + "in the null machine" << endl; + + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); + } + else { + + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying max repetition to a machine that " + "accepts zero length word" << endl; + } + } + + /* Do the repetition on the machine. Handles the n == 0 case. */ + return FsmAp::maxRepeatOp( factorTree.fsm, upperRep ); + } + case MinType: { + /* Evaluate the repeated machine. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying min repetition to a machine that " + "accepts zero length word" << endl; + } + + return FsmAp::minRepeatOp( factorTree.fsm, lowerRep ); + } + case RangeType: { + /* Check for bogus range. */ + if ( upperRep - lowerRep < 0 ) { + pd->id->error(loc) << "invalid range repetition" << endl; + + /* Return null machine as recovery. */ + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); + } + + /* Now need to evaluate the repeated machine. */ + FsmRes factorTree = factorWithRep->walk( pd ); + if ( !factorTree.success() ) + return factorTree; + + if ( lowerRep == 0 && upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. This + * defeats the purpose so give a warning. */ + pd->id->warning(loc) << "zero to zero repetitions results " + "in the null machine" << endl; + } + else { + + if ( factorTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying range repetition to a machine that " + "accepts zero length word" << endl; + } + + } + return FsmAp::rangeRepeatOp( factorTree.fsm, lowerRep, upperRep ); + } + case FactorWithNegType: { + /* Evaluate the Factor. Pass it up. */ + return factorWithNeg->walk( pd ); + }} + return FsmRes( FsmRes::InternalError() ); +} + +void FactorWithRep::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->makeNameTree( pd ); + break; + case FactorWithNegType: + factorWithNeg->makeNameTree( pd ); + break; + } +} + +void FactorWithRep::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->resolveNameRefs( pd ); + break; + case FactorWithNegType: + factorWithNeg->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with negation node. */ +FactorWithNeg::~FactorWithNeg() +{ + switch ( type ) { + case NegateType: + case CharNegateType: + delete factorWithNeg; + break; + case FactorType: + delete factor; + break; + } +} + +/* Evaluate a factor with negation node. */ +FsmRes FactorWithNeg::walk( ParseData *pd ) +{ + switch ( type ) { + case NegateType: { + /* Evaluate the factorWithNeg. */ + FsmRes toNegate = factorWithNeg->walk( pd ); + + /* Negation is subtract from dot-star. */ + FsmAp *ds = FsmAp::dotStarFsm( pd->fsmCtx ); + FsmRes res = FsmAp::subtractOp( ds, toNegate.fsm ); + + return res; + } + case CharNegateType: { + /* Evaluate the factorWithNeg. */ + FsmRes toNegate = factorWithNeg->walk( pd ); + + /* CharNegation is subtract from dot. */ + FsmAp *ds = FsmAp::dotFsm( pd->fsmCtx ); + FsmRes res = FsmAp::subtractOp( ds, toNegate.fsm ); + + return res; + } + case FactorType: { + /* Evaluate the Factor. Pass it up. */ + return factor->walk( pd ); + }} + return FsmRes( FsmRes::InternalError() ); +} + +void FactorWithNeg::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->makeNameTree( pd ); + break; + case FactorType: + factor->makeNameTree( pd ); + break; + } +} + +void FactorWithNeg::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->resolveNameRefs( pd ); + break; + case FactorType: + factor->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor node. */ +Factor::~Factor() +{ + switch ( type ) { + case LiteralType: + delete literal; + break; + case RangeType: + delete range; + break; + case OrExprType: + delete reItem; + break; + case RegExprType: + delete regExpr; + break; + case ReferenceType: + break; + case ParenType: + delete join; + break; + case LongestMatchType: + delete longestMatch; + break; + case NfaWrap: case NfaRep: + case CondStar: case CondPlus: + delete expression; + break; + } +} + + +/* Evaluate a factor node. */ +FsmRes Factor::walk( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + return FsmRes( FsmRes::Fsm(), literal->walk( pd ) ); + case RangeType: + return FsmRes( FsmRes::Fsm(), range->walk( pd ) ); + case OrExprType: + return reItem->walk( pd, 0 ); + case RegExprType: + return FsmRes( FsmRes::Fsm(), regExpr->walk( pd, 0 ) ); + case ReferenceType: + return varDef->walk( pd ); + case ParenType: + return join->walk( pd ); + case LongestMatchType: + return longestMatch->walk( pd ); + case NfaRep: { + FsmRes exprTree = expression->walk( pd ); + + if ( mode == Factor::NfaLegacy ) { + FsmRes res = FsmAp::nfaRepeatOp( exprTree.fsm, action1, action2, action3, + action4, action5, action6 ); + + res.fsm->verifyIntegrity(); + return res; + } + else if ( mode == Factor::NfaLazy ) { + FsmRes res = FsmAp::nfaRepeatOp2( exprTree.fsm, action1, action2, action3, + action4, action5, action6, FsmAp::NfaLazy ); + + res.fsm->verifyIntegrity(); + return res; + } + else { + FsmRes res = FsmAp::nfaRepeatOp2( exprTree.fsm, action1, action2, action3, + action4, action5, action6, FsmAp::NfaGreedy ); + + res.fsm->verifyIntegrity(); + return res; + } + } + case NfaWrap: { + FsmRes exprTree = expression->walk( pd ); + if ( mode == Factor::NfaLazy ) { + FsmRes res = FsmAp::nfaWrap( exprTree.fsm, action1, action2, action3, + action4, /* action5, */ action6, FsmAp::NfaLazy ); + + res.fsm->verifyIntegrity(); + return res; + } + else { + FsmRes res = FsmAp::nfaWrap( exprTree.fsm, action1, action2, action3, + action4, /* action5, */ action6, FsmAp::NfaGreedy ); + + res.fsm->verifyIntegrity(); + return res; + } + } + case CondStar: { + FsmRes exprTree = expression->walk( pd ); + if ( !exprTree.success() ) + return exprTree; + + if ( exprTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying plus operator to a machine that " + "accepts zero length word" << endl; + } + + return FsmAp::condStar( exprTree.fsm, repId, action1, action2, action3, action4 ); + } + case CondPlus: { + FsmRes exprTree = expression->walk( pd ); + if ( !exprTree.success() ) + return exprTree; + + if ( exprTree.fsm->startState->isFinState() ) { + pd->id->warning(loc) << "applying plus operator to a machine that " + "accepts zero length word" << endl; + } + + return FsmAp::condPlus( exprTree.fsm, repId, action1, action2, action3, action4 ); + }} + + return FsmRes( FsmRes::InternalError() ); +} + +void Factor::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->makeNameTree( loc, pd ); + break; + case ParenType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + longestMatch->makeNameTree( pd ); + break; + case NfaWrap: + case NfaRep: + case CondStar: + case CondPlus: + expression->makeNameTree( pd ); + break; + } +} + +void Factor::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->resolveNameRefs( pd ); + break; + case ParenType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + longestMatch->resolveNameRefs( pd ); + break; + case NfaRep: + case NfaWrap: + case CondStar: + case CondPlus: + expression->resolveNameRefs( pd ); + break; + } +} + +/* Clean up a range object. Must delete the two literals. */ +Range::~Range() +{ + delete lowerLit; + delete upperLit; +} + +/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ +FsmAp *Range::walk( ParseData *pd ) +{ + /* Construct and verify the suitability of the lower end of the range. */ + FsmAp *lowerFsm = lowerLit->walk( pd ); + if ( !lowerFsm->checkSingleCharMachine() ) { + pd->id->error(lowerLit->loc) << + "bad range lower end, must be a single character" << endl; + } + + /* Construct and verify the upper end. */ + FsmAp *upperFsm = upperLit->walk( pd ); + if ( !upperFsm->checkSingleCharMachine() ) { + pd->id->error(upperLit->loc) << + "bad range upper end, must be a single character" << endl; + } + + /* Grab the keys from the machines, then delete them. */ + Key lowKey = lowerFsm->startState->outList.head->lowKey; + Key highKey = upperFsm->startState->outList.head->lowKey; + delete lowerFsm; + delete upperFsm; + + /* Validate the range. */ + if ( pd->fsmCtx->keyOps->gt( lowKey, highKey ) ) { + /* Recover by setting upper to lower; */ + pd->id->error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Return the range now that it is validated. */ + FsmAp *retFsm; + if ( caseIndep ) + retFsm = FsmAp::rangeFsmCI( pd->fsmCtx, lowKey, highKey ); + else + retFsm = FsmAp::rangeFsm( pd->fsmCtx, lowKey, highKey ); + + return retFsm; +} + +/* Evaluate a literal object. */ +FsmAp *Literal::walk( ParseData *pd ) +{ + /* FsmAp to return, is the alphabet signed. */ + FsmAp *rtnVal = 0; + + switch ( type ) { + case Number: { + /* Make a C string. Maybe put - up front. */ + Vector<char> num = data; + if ( neg ) + num.insert( 0, '-' ); + num.append( 0 ); + + /* Make the fsm key in int format. */ + Key fsmKey = makeFsmKeyNum( num.data, loc, pd ); + + /* Make the new machine. */ + rtnVal = FsmAp::concatFsm( pd->fsmCtx, fsmKey ); + break; + } + case LitString: { + /* Make the array of keys in int format. */ + long length; + bool caseInsensitive; + char *litstr = prepareLitString( pd->id, loc, data.data, data.length(), + length, caseInsensitive ); + Key *arr = new Key[length]; + makeFsmKeyArray( arr, litstr, length, pd ); + + /* Make the new machine. */ + if ( caseInsensitive ) + rtnVal = FsmAp::concatFsmCI( pd->fsmCtx, arr, length ); + else + rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, length ); + delete[] litstr; + delete[] arr; + break; + } + case HexString: { + long length; + Key *arr = prepareHexString( pd, loc, data.data, data.length(), length ); + rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, length ); + delete[] arr; + break; + }} + return rtnVal; +} + +/* Clean up after a regular expression object. */ +RegExpr::~RegExpr() +{ + switch ( type ) { + case RecurseItem: + delete regExpr; + delete item; + break; + case Empty: + break; + } +} + +/* Evaluate a regular expression object. */ +FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* This is the root regex, pass down a pointer to this. */ + if ( rootRegex == 0 ) + rootRegex = this; + + FsmAp *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Walk both items. */ + rtnVal = regExpr->walk( pd, rootRegex ); + FsmRes fsm2 = item->walk( pd, rootRegex ); + FsmRes res = FsmAp::concatOp( rtnVal, fsm2.fsm ); + rtnVal = res.fsm; + break; + } + case Empty: { + rtnVal = FsmAp::lambdaFsm( pd->fsmCtx ); + break; + } + } + return rtnVal; +} + +/* Clean up after an item in a regular expression. */ +ReItem::~ReItem() +{ + switch ( type ) { + case Data: + case Dot: + break; + case OrBlock: + case NegOrBlock: + delete orBlock; + break; + } +} + +/* Evaluate a regular expression object. */ +FsmRes ReItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The fsm to return, is the alphabet signed? */ + FsmAp *rtnVal = 0; + + switch ( type ) { + case Data: { + /* Move the data into an integer array and make a concat fsm. */ + Key *arr = new Key[data.length()]; + makeFsmKeyArray( arr, data.data, data.length(), pd ); + + /* Make the concat fsm. */ + if ( rootRegex != 0 && rootRegex->caseInsensitive ) + rtnVal = FsmAp::concatFsmCI( pd->fsmCtx, arr, data.length() ); + else + rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, data.length() ); + delete[] arr; + break; + } + case Dot: { + /* Make the dot fsm. */ + rtnVal = FsmAp::dotFsm( pd->fsmCtx ); + break; + } + case OrBlock: { + /* Get the or block and minmize it. */ + rtnVal = orBlock->walk( pd, rootRegex ); + if ( rtnVal == 0 ) + rtnVal = FsmAp::lambdaFsm( pd->fsmCtx ); + rtnVal->minimizePartition2(); + break; + } + case NegOrBlock: { + /* Get the or block and minimize it. */ + FsmAp *fsm = orBlock->walk( pd, rootRegex ); + fsm->minimizePartition2(); + + /* Make a dot fsm and subtract from it. */ + rtnVal = FsmAp::dotFsm( pd->fsmCtx ); + FsmRes res = FsmAp::subtractOp( rtnVal, fsm ); + rtnVal = res.fsm; + rtnVal->minimizePartition2(); + break; + } + } + + /* If the item is followed by a star, then apply the star op. */ + if ( star ) { + if ( rtnVal->startState->isFinState() ) { + pd->id->warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + FsmRes res = FsmAp::starOp( rtnVal ); + rtnVal = res.fsm; + rtnVal->minimizePartition2(); + } + + return FsmRes( FsmRes::Fsm(), rtnVal ); +} + +/* Clean up after an or block of a regular expression. */ +ReOrBlock::~ReOrBlock() +{ + switch ( type ) { + case RecurseItem: + delete orBlock; + delete item; + break; + case Empty: + break; + } +} + + +/* Evaluate an or block of a regular expression. */ +FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Evaluate the two fsm. */ + FsmAp *fsm1 = orBlock->walk( pd, rootRegex ); + FsmAp *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + FsmRes res = FsmAp::unionOp( fsm1, fsm2 ); + fsm1 = res.fsm; + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal;; +} + +/* Evaluate an or block item of a regular expression. */ +FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + KeyOps *keyOps = pd->fsmCtx->keyOps; + + /* The return value, is the alphabet signed? */ + FsmAp *rtnVal = 0; + switch ( type ) { + case Data: { + /* Put the or data into an array of ints. Note that we find unique + * keys. Duplicates are silently ignored. The alternative would be to + * issue warning or an error but since we can't with [a0-9a] or 'a' | + * 'a' don't bother here. */ + KeySet keySet( keyOps ); + makeFsmUniqueKeyArray( keySet, data.data, data.length(), + rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); + + /* Run the or operator. */ + rtnVal = FsmAp::orFsm( pd->fsmCtx, keySet.data, keySet.length() ); + break; + } + case Range: { + /* Make the upper and lower keys. */ + Key lowKey = makeFsmKeyChar( lower, pd ); + Key highKey = makeFsmKeyChar( upper, pd ); + + /* Validate the range. */ + if ( keyOps->gt( lowKey, highKey ) ) { + /* Recover by setting upper to lower; */ + pd->id->error(loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Make the range machine. */ + rtnVal = FsmAp::rangeFsm( pd->fsmCtx, lowKey, highKey ); + + if ( rootRegex != 0 && rootRegex->caseInsensitive ) { + if ( keyOps->le( lowKey, 'Z' ) && pd->fsmCtx->keyOps->le( 'A', highKey ) ) { + Key otherLow = keyOps->lt( lowKey, 'A' ) ? Key('A') : lowKey; + Key otherHigh = keyOps->lt( 'Z', highKey ) ? Key('Z') : highKey; + + otherLow = keyOps->add( 'a', ( keyOps->sub( otherLow, 'A' ) ) ); + otherHigh = keyOps->add( 'a', ( keyOps->sub( otherHigh, 'A' ) ) ); + + FsmAp *otherRange = FsmAp::rangeFsm( pd->fsmCtx, otherLow, otherHigh ); + FsmRes res = FsmAp::unionOp( rtnVal, otherRange ); + rtnVal = res.fsm; + rtnVal->minimizePartition2(); + } + else if ( keyOps->le( lowKey, 'z' ) && keyOps->le( 'a', highKey ) ) { + Key otherLow = keyOps->lt( lowKey, 'a' ) ? Key('a') : lowKey; + Key otherHigh = keyOps->lt( 'z', highKey ) ? Key('z') : highKey; + + otherLow = keyOps->add('A' , ( keyOps->sub( otherLow , 'a' ) )); + otherHigh = keyOps->add('A' , ( keyOps->sub( otherHigh , 'a' ) )); + + FsmAp *otherRange = FsmAp::rangeFsm( pd->fsmCtx, otherLow, otherHigh ); + FsmRes res = FsmAp::unionOp( rtnVal, otherRange ); + rtnVal = res.fsm; + rtnVal->minimizePartition2(); + } + } + + break; + }} + return rtnVal; +} diff --git a/ragel/parsetree.h b/ragel/parsetree.h new file mode 100644 index 00000000..1d4f7e6b --- /dev/null +++ b/ragel/parsetree.h @@ -0,0 +1,873 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PARSETREE_H +#define _PARSETREE_H + +#include "ragel.h" +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "fsmgraph.h" + +struct NameInst; + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + + +struct ParseData; + +/* Leaf type. */ +struct Literal; + +/* Tree nodes. */ + +struct Term; +struct FactorWithAug; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Expression; +struct Join; +struct NfaUnion; +struct MachineDef; +struct LongestMatch; +struct LongestMatchPart; +struct LmPartList; +struct Range; +struct LengthDef; +struct colm_data; +struct colm_location; + +/* Type of augmentation. Describes locations in the machine. */ +enum AugType +{ + /* Transition actions/priorities. */ + at_start, + at_all, + at_finish, + at_leave, + + /* Global error actions. */ + at_start_gbl_error, + at_all_gbl_error, + at_final_gbl_error, + at_not_start_gbl_error, + at_not_final_gbl_error, + at_middle_gbl_error, + + /* Local error actions. */ + at_start_local_error, + at_all_local_error, + at_final_local_error, + at_not_start_local_error, + at_not_final_local_error, + at_middle_local_error, + + /* To State Action embedding. */ + at_start_to_state, + at_all_to_state, + at_final_to_state, + at_not_start_to_state, + at_not_final_to_state, + at_middle_to_state, + + /* From State Action embedding. */ + at_start_from_state, + at_all_from_state, + at_final_from_state, + at_not_start_from_state, + at_not_final_from_state, + at_middle_from_state, + + /* EOF Action embedding. */ + at_start_eof, + at_all_eof, + at_final_eof, + at_not_start_eof, + at_not_final_eof, + at_middle_eof +}; + +/* IMPORTANT: These must follow the same order as the state augs in AugType + * since we will be using this to compose AugType. */ +enum StateAugType +{ + sat_start = 0, + sat_all, + sat_final, + sat_not_start, + sat_not_final, + sat_middle +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +struct NameRef : public Vector<std::string> {}; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef *target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef *target; +}; + +struct Label +{ + Label( const InputLoc &loc, std::string data ) + : loc(loc), data(data), cut(false) { } + + InputLoc loc; + std::string data; + bool cut; +}; + +/* Structrue represents an action assigned to some FactorWithAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct ConditionTest +{ + ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) : + loc(loc), type(type), action(action), sense(sense) { } + + InputLoc loc; + AugType type; + Action *action; + bool sense; +}; + +struct Token +{ + char *data; + int length; + ParserLoc loc; + + void set( const char *str, int len, colm_location *cl); + void set( colm_data *cd, colm_location *cl); + void set( const char *str, int len, const InputLoc &loc ); + void set( const char *str, int len, const ParserLoc &loc ); + +private: + void _set( const char *str, int len ); +}; + + +struct RedToken +{ + const char *data; + int length; + ParserLoc loc; + + void set( colm_data *cd, colm_location *cl); +}; + + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct VarDef +{ + VarDef( std::string name, MachineDef *machineDef ) + : name(name), machineDef(machineDef), isExport(false) { } + + ~VarDef(); + + /* Parse tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( const InputLoc &loc, ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + std::string name; + MachineDef *machineDef; + bool isExport; +}; + + +/* + * LongestMatch + * + * Wherever possible the item match will execute on the character. If not + * possible the item match will execute on a lookahead character and either + * hold the current char (if one away) or backup. + * + * How to handle the problem of backing up over a buffer break? + * + * Don't want to use pending out transitions for embedding item match because + * the role of item match action is different: it may sometimes match on the + * final transition, or may match on a lookahead character. + * + * Don't want to invent a new operator just for this. So just trail action + * after machine, this means we can only use literal actions. + * + * The item action may + * + * What states of the machine will be final. The item actions that wrap around + * on the last character will go straight to the start state. + * + * Some transitions will be lookahead transitions, they will hold the current + * character. Crossing them with regular transitions must be restricted + * because it does not make sense. The transition cannot simultaneously hold + * and consume the current character. + */ +struct LongestMatchPart +{ + LongestMatchPart( Join *join, Action *action, + const InputLoc &semiLoc, int longestMatchId ) + : + join(join), action(action), semiLoc(semiLoc), + longestMatchId(longestMatchId), inLmSelect(false) { } + + InputLoc getLoc(); + + Join *join; + Action *action; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + Action *actNfaOnLast; + Action *actNfaOnNext; + Action *actNfaOnEof; + int longestMatchId; + bool inLmSelect; + LongestMatch *longestMatch; + + LongestMatchPart *prev, *next; +}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct LmPartList : DList<LongestMatchPart> {}; + +struct LongestMatch +{ + /* Construct with a list of joins */ + LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) + : + loc(loc), + longestMatchList(longestMatchList), + lmSwitchHandlesError(false), + nfaConstruction(false) + { } + + InputLoc loc; + LmPartList *longestMatchList; + std::string name; + Action *lmActSelect; + bool lmSwitchHandlesError; + bool nfaConstruction; + + LongestMatch *next, *prev; + + /* Tree traversal. */ + FsmRes walkClassic( ParseData *pd ); + FsmRes walk( ParseData *pd ); + + FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm ); + bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ); + bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ); + void eliminateNfaActions( ParseData *pd, FsmAp *fsm ); + void advanceNfaActions( ParseData *pd, FsmAp *fsm ); + FsmRes buildBaseNfa( ParseData *pd ); + FsmRes walkNfa( ParseData *pd ); + + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + void transferScannerLeavingActions( FsmAp *graph ); + void runLongestMatch( ParseData *pd, FsmAp *graph ); + Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name, + InlineList *inlineList ); + void makeActions( ParseData *pd ); + void findName( ParseData *pd ); + void restart( FsmAp *graph, TransAp *trans ); + void restart( FsmAp *graph, CondAp *cond ); +}; + + +/* List of Expressions. */ +typedef DList<Expression> ExprList; + +struct MachineDef +{ + enum Type { + JoinType, + LongestMatchType, + LengthDefType, + NfaUnionType + }; + + MachineDef( Join *join ) + : join(join), longestMatch(0), lengthDef(0), nfaUnion(0), + type(JoinType) {} + + MachineDef( LongestMatch *longestMatch ) + : join(0), longestMatch(longestMatch), lengthDef(0), nfaUnion(0), + type(LongestMatchType) {} + + MachineDef( LengthDef *lengthDef ) + : join(0), longestMatch(0), lengthDef(lengthDef), nfaUnion(0), + type(LengthDefType) {} + + MachineDef( NfaUnion *nfaUnion ) + : join(0), longestMatch(0), lengthDef(0), nfaUnion(nfaUnion), + type(NfaUnionType) {} + + ~MachineDef(); + + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Join *join; + LongestMatch *longestMatch; + LengthDef *lengthDef; + NfaUnion *nfaUnion; + Type type; +}; + +/* + * Join + */ +struct Join +{ + /* Construct with the first expression. */ + Join( Expression *expr ); + Join( const InputLoc &loc, Expression *expr ); + + ~Join() + { + exprList.empty(); + } + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + FsmRes walkJoin( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Data. */ + InputLoc loc; + ExprList exprList; +}; + +/* + * Expression + */ +struct Expression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + /* Construct with an expression on the left and a term on the right. */ + Expression( Expression *expression, Term *term, Type type ) : + expression(expression), term(term), + type(type), prev(this), next(this) { } + + /* Construct with only a term. */ + Expression( Term *term ) : + expression(0), term(term), + type(TermType) , prev(this), next(this) { } + + /* Construct with a builtin type. */ + Expression( BuiltinMachine builtin ) : + expression(0), term(0), builtin(builtin), + type(BuiltinType), prev(this), next(this) { } + + ~Expression(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + Expression *expression; + Term *term; + BuiltinMachine builtin; + Type type; + + Expression *prev, *next; +}; + +typedef Vector<Term*> TermVect; + +/* + * NfaUnion + */ +struct NfaUnion +{ + /* Construct with only a term. */ + NfaUnion() : roundsList(0) { } + ~NfaUnion(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + TermVect terms; + NfaRoundVect *roundsList; +}; + + +/* + * Term + */ +struct Term +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorWithAugType + }; + + Term( Term *term, FactorWithAug *factorWithAug ) : + term(term), factorWithAug(factorWithAug), type(ConcatType) { } + + Term( Term *term, FactorWithAug *factorWithAug, Type type ) : + term(term), factorWithAug(factorWithAug), type(type) { } + + Term( Action *action1, Action *action2, Action *action3, + Term *term, FactorWithAug *factorWithAug, + FactorWithAug *factorWithAug2, Type type ) + : + action1(action1), action2(action2), action3(action3), + term(term), factorWithAug(factorWithAug), + factorWithAug2(factorWithAug2), type(type) + { } + + Term( FactorWithAug *factorWithAug ) : + term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } + + ~Term(); + + FsmRes walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Action *action1; + Action *action2; + Action *action3; + + Term *term; + FactorWithAug *factorWithAug; + FactorWithAug *factorWithAug2; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct FactorWithAug +{ + FactorWithAug( FactorWithRep *factorWithRep ) + : + priorDescs(0), + factorWithRep(factorWithRep) + {} + + ~FactorWithAug(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); + void assignPriorities( FsmAp *graph, int *priorOrd ); + + void assignConditions( FsmAp *graph ); + + /* Actions and priorities assigned to the factor node. */ + Vector<ParserAction> actions; + Vector<PriorityAug> priorityAugs; + PriorDesc *priorDescs; + std::vector<Label> labels; + Vector<EpsilonLink> epsilonLinks; + Vector<ConditionTest> conditions; + + FactorWithRep *factorWithRep; +}; + +/* Fourth level of precedence. Trailing unary operators. Provide kleen star, + * optional and plus. */ +struct FactorWithRep +{ + enum Type { + StarType, + StarStarType, + OptionalType, + PlusType, + ExactType, + MaxType, + MinType, + RangeType, + FactorWithNegType + }; + + FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, + int lowerRep, int upperRep, Type type ) + : + loc(loc), repId(0), factorWithRep(factorWithRep), + factorWithNeg(0), lowerRep(lowerRep), + upperRep(upperRep), type(type) + {} + + FactorWithRep( FactorWithNeg *factorWithNeg ) + : factorWithNeg(factorWithNeg), type(FactorWithNegType) + {} + + ~FactorWithRep(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + long long repId; + FactorWithRep *factorWithRep; + FactorWithNeg *factorWithNeg; + int lowerRep, upperRep; + Type type; + + /* Priority descriptor for StarStar type. */ + PriorDesc priorDescs[4]; +}; + +/* Fifth level of precedence. Provides Negation. */ +struct FactorWithNeg +{ + enum Type { + NegateType, + CharNegateType, + FactorType + }; + + FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : + loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } + + FactorWithNeg( Factor *factor ) : + factorWithNeg(0), factor(factor), type(FactorType) { } + + ~FactorWithNeg(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + FactorWithNeg *factorWithNeg; + Factor *factor; + Type type; +}; + +/* + * Factor + */ +struct Factor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + RangeType, + OrExprType, + RegExprType, + ReferenceType, + ParenType, + LongestMatchType, + NfaRep, + NfaWrap, + CondStar, + CondPlus + }; + + enum NfaRepeatMode { + NfaLegacy = 1, + NfaGreedy, + NfaLazy + }; + + /* Construct with a literal fsm. */ + Factor( Literal *literal ) : + literal(literal), type(LiteralType) { } + + /* Construct with a range. */ + Factor( Range *range ) : + range(range), type(RangeType) { } + + /* Construct with the or part of a regular expression. */ + Factor( ReItem *reItem ) : + reItem(reItem), type(OrExprType) { } + + /* Construct with a regular expression. */ + Factor( RegExpr *regExpr ) : + regExpr(regExpr), type(RegExprType) { } + + /* Construct with a reference to a var def. */ + Factor( const InputLoc &loc, VarDef *varDef ) : + loc(loc), varDef(varDef), type(ReferenceType) {} + + /* Construct with a parenthesized join. */ + Factor( Join *join ) : + join(join), type(ParenType) {} + + /* Construct with a longest match operator. */ + Factor( LongestMatch *longestMatch ) : + longestMatch(longestMatch), type(LongestMatchType) {} + + Factor( const InputLoc &loc, long long repId, Expression *expression, + Action *action1, Action *action2, Action *action3, + Action *action4, Action *action5, Action *action6, Type type ) + : + loc(loc), repId(repId), expression(expression), + action1(action1), action2(action2), action3(action3), + action4(action4), action5(action5), action6(action6), + type(type) + {} + + /* Cleanup. */ + ~Factor(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + Literal *literal; + Range *range; + ReItem *reItem; + RegExpr *regExpr; + VarDef *varDef; + Join *join; + LongestMatch *longestMatch; + int lower, upper; + long repId; + Expression *expression; + Action *action1; + Action *action2; + Action *action3; + Action *action4; + Action *action5; + Action *action6; + PriorDesc priorDescs[4]; + NfaRepeatMode mode; + + Type type; +}; + +/* A range machine. Only ever composed of two literals. */ +struct Range +{ + Range( Literal *lowerLit, Literal *upperLit, bool caseIndep ) + : lowerLit(lowerLit), upperLit(upperLit), caseIndep(caseIndep) { } + + ~Range(); + FsmAp *walk( ParseData *pd ); + + Literal *lowerLit; + Literal *upperLit; + bool caseIndep; +}; + +/* Some literal machine. Can be a number or literal string. */ +struct Literal +{ + enum LiteralType { Number, LitString, HexString }; + + Literal( const InputLoc &loc, bool neg, const char *_data, int len, LiteralType type ) + : loc(loc), neg(neg), type(type) + { + data.append( _data, len ); + } + + FsmAp *walk( ParseData *pd ); + + InputLoc loc; + bool neg; + Vector<char> data; + LiteralType type; +}; + +/* Regular expression. */ +struct RegExpr +{ + enum RegExpType { RecurseItem, Empty }; + + /* Constructors. */ + RegExpr() : + type(Empty), caseInsensitive(false) { } + RegExpr(RegExpr *regExpr, ReItem *item) : + regExpr(regExpr), item(item), + type(RecurseItem), caseInsensitive(false) { } + + ~RegExpr(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + RegExpr *regExpr; + ReItem *item; + RegExpType type; + bool caseInsensitive; +}; + +/* An item in a regular expression. */ +struct ReItem +{ + enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; + + ReItem( const InputLoc &loc, const char *_data, int len ) + : + loc(loc), star(false), type(Data) + { + data.append( _data, len ); + } + + ReItem( const InputLoc &loc, ReItemType type ) + : loc(loc), star(false), type(type) { } + + ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) + : loc(loc), orBlock(orBlock), star(false), type(type) { } + + ~ReItem(); + FsmRes walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Vector<char> data; + ReOrBlock *orBlock; + bool star; + ReItemType type; +}; + +/* An or block item. */ +struct ReOrBlock +{ + enum ReOrBlockType { RecurseItem, Empty }; + + /* Constructors. */ + ReOrBlock() + : type(Empty) { } + ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) + : orBlock(orBlock), item(item), type(RecurseItem) { } + + ~ReOrBlock(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + ReOrBlock *orBlock; + ReOrItem *item; + ReOrBlockType type; +}; + +/* An item in an or block. */ +struct ReOrItem +{ + enum ReOrItemType { Data, Range }; + + ReOrItem( const InputLoc &loc, const char *_data, int len ) + : + loc(loc), type(Data) + { + data.append( _data, len ); + } + + ReOrItem( const InputLoc &loc, char lower, char upper ) + : loc(loc), lower(lower), upper(upper), type(Range) { } + + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Vector<char> data; + char lower; + char upper; + ReOrItemType type; +}; + + +#endif diff --git a/ragel/pcheck.h b/ragel/pcheck.h new file mode 100644 index 00000000..adc011b3 --- /dev/null +++ b/ragel/pcheck.h @@ -0,0 +1,49 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PCHECK_H +#define _PCHECK_H + +class ParamCheck +{ +public: + ParamCheck( const char *paramSpec, int argc, const char **argv); + + bool check(); + + const char *paramArg; /* The argument to the parameter. */ + char parameter; /* The parameter matched. */ + enum { match, invalid, noparam } state; + + const char *argOffset; /* If we are reading params inside an + * arg this points to the offset. */ + + const char *curArg; /* Pointer to the current arg. */ + int iCurArg; /* Index to the current arg. */ + +private: + const char *paramSpec; /* Parameter spec supplied by the coder. */ + int argc; /* Arguement data from the command line. */ + const char **argv; +}; + +#endif diff --git a/ragel/ragel-config.cmake.in b/ragel/ragel-config.cmake.in new file mode 100644 index 00000000..8de5d2cb --- /dev/null +++ b/ragel/ragel-config.cmake.in @@ -0,0 +1,3 @@ +# @_PACKAGE_NAME@-config.cmake Generated from ragel-config.cmake.in by cmake + +include("${CMAKE_CURRENT_LIST_DIR}/@_PACKAGE_NAME@-targets.cmake") diff --git a/ragel/ragel.h b/ragel/ragel.h new file mode 100644 index 00000000..c3fd6f22 --- /dev/null +++ b/ragel/ragel.h @@ -0,0 +1,108 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _RAGEL_H +#define _RAGEL_H + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <string> +#include "vector.h" +#include "config.h" +#include "common.h" + +#define PROGNAME "ragel" + +#define MAIN_MACHINE "main" + +/* Target output style. */ +enum CodeStyle +{ + GenBinaryLoop, + GenBinaryExp, + GenFlatLoop, + GenFlatExp, + GenGotoLoop, + GenGotoExp, + GenSwitchLoop, + GenSwitchExp, + GenIpGoto +}; + +/* To what degree are machine minimized. */ +enum MinimizeLevel { + #ifdef TO_UPGRADE_CONDS + MinimizeApprox, + #endif + #ifdef TO_UPGRADE_CONDS + MinimizeStable, + #endif + MinimizePartition1, + MinimizePartition2 +}; + +enum MinimizeOpt { + MinimizeNone, + MinimizeEnd, + MinimizeMostOps, + MinimizeEveryOp +}; + +/* Target implementation */ +enum RubyImplEnum +{ + MRI, + Rubinius +}; + +/* Error reporting format. */ +enum ErrorFormat { + ErrorFormatGNU, + ErrorFormatMSVC, +}; + +extern ErrorFormat errorFormat; + + +struct colm_location; + +InputLoc makeInputLoc( const char *fileName, int line = 0, int col = 0 ); +InputLoc makeInputLoc( const struct colm_location *loc ); +std::ostream &operator<<( std::ostream &out, const InputLoc &loc ); + +void xmlEscapeHost( std::ostream &out, const char *data, long len ); + + +using std::endl; + +extern const char mainMachine[]; + +struct AbortCompile +{ + AbortCompile( int code ) + : code(code) {} + + int code; +}; + +#endif diff --git a/ragel/ragel.lm b/ragel/ragel.lm new file mode 100644 index 00000000..c9f6a3b5 --- /dev/null +++ b/ragel/ragel.lm @@ -0,0 +1,1021 @@ + +global GblActionParams: bool = false +global GblMachineMap: map<str, machine> = new map<str, machine>() +global GblCurMachine: machine +global GblTargetMachine: str = "" +global GblSearchMachine: str = "" +global GblWantSection: bool = false +global GblIncludeDepth: int = 0 +global GblImport: bool = false +global GblFileName: str = "" +global GblIncludePaths: list<str> = new list<str>() + +struct saved_globals + FileName: str + TargetMachine: str + SearchMachine: str + WantSection: bool + IncludeDepth: int + Import: bool +end + +global GblSavedGlobals: list<saved_globals> = new list<saved_globals>() + +void saveGlobals() +{ + new SG: saved_globals() + + SG->FileName = GblFileName + SG->TargetMachine = GblTargetMachine + SG->SearchMachine = GblSearchMachine + SG->WantSection = GblWantSection + SG->Import = GblImport + SG->IncludeDepth = GblIncludeDepth + + GblSavedGlobals->push_tail( SG ) +} + +void restoreGlobals() +{ + SG: saved_globals = GblSavedGlobals->pop_tail() + + GblFileName = SG->FileName + GblTargetMachine = SG->TargetMachine + GblSearchMachine = SG->SearchMachine + GblWantSection = SG->WantSection + GblImport = SG->Import + GblIncludeDepth = SG->IncludeDepth +} + +struct include_history_item + FileName: str + SectionName: str +end + +bool isDuplicateInclude( From: machine, FileName: str, SectionName: str ) +{ + for Item: include_history_item in From->IncludeHistory { + if Item->FileName == FileName && Item->SectionName == SectionName { + return true + } + } + return false +} + +void addIncludeItem( From: machine, FileName: str, SectionName: str ) +{ + new Item: include_history_item() + Item->FileName = FileName + Item->SectionName = SectionName + From->IncludeHistory->push_tail( Item ) +} + +struct machine + Name: str + ActionParams: map<str, str> + IncludeHistory: list<include_history_item> +end + + +rl ident + /( alpha | '_' ) ( alpha | digit | '_' )*/ + +rl number + / digit+ / + +rl hex_number + / '0x' [0-9a-fA-F]+ / + +rl hex_char + / '0x' [0-9a-fA-F]{2} / + +rl NL / '\n' / + +rl c_comment + / '/*' ( any | NL )* :>> '*/' / + +rl cpp_comment + / '//' [^\n]* NL / + +rl ruby_comment + / '#' [^\n]* NL / + +rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + +rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + +rl host_re_literal + / '/' ([^/\\] | NL | '\\' (any | NL))* '/' / + +# +# Consuming ragel defintions without parsing. Used for included sections we +# don't want and for import (TODO). +# +namespace consume + lex + token h_word / [a-zA-Z_][a-zA-Z0-9_]* / + + token h_open /'{'/ + token h_close /'}'/ + + token h_number /digit+/ + token h_hex_number /'0x' [0-9a-fA-F]+/ + + token h_comment + / c_comment | cpp_comment / + + token h_string + / s_literal | d_literal / + + token h_whitespace + / ( [ \t] | NL )+ / + + token h_any / any / + end + + def host_tok + [h_word] + | [h_number] + | [h_hex_number] + | [h_comment] + | [h_string] + | [h_whitespace] + | [h_open host_tok* h_close] + | [h_any] + + lex + ignore /[\t\n ]+/ + ignore /'#' any* :> '\n'/ + + literal `}%% + + token word / [a-zA-Z_][a-zA-Z0-9_]* / + token uint / number / + token hex / hex_number / + + token string / + '"' ( [^"\\] | '\\' any )* '"' 'i'? | + "'" ( [^'\\] | '\\' any )* "'" 'i'? | + "[" ( [^\]\\] | '\\' any )* "]" 'i'? #| + #"/" ( [^\/\\] | '\\' any )* "/" 'i'? + / + + + token open /'{'/ -ni + token close ni- /'}'/ + token c_any / any / + end + + + # Garbling up a machine, no interpretation + def tok + [word] + | [uint] + | [hex] + | [string] + | [open host_tok* h_close] + | [c_any] +end + +# State reference. +namespace state_ref + lex + ignore /[\t\n ]+/ + literal `:: `; `) + token word /[a-zA-Z_][a-zA-Z0-9_]*/ + end + + def state_ref + [opt_name_sep state_ref_names] :Ref + + def opt_name_sep + [`::] :ColonColon + | [] :Empty + + # List of names separated by :: + def state_ref_names + [state_ref_names `:: word] :Rec + | [word] :Base +end + +namespace inline + + def inline_expr + [expr_item_list] :List + + def expr_item_list + [expr_item_list expr_item] :Rec + | [] :Empty + + def expr_item + [expr_any] :ExprAny + | [expr_symbol] :ExprSymbol + | [expr_interpret] :ExprInterpret + + def expr_any + [whitespace] :WS + | [comment] :Comment + | [string] :String + | [number] :Number + | [hex_number] :Hex + | [ident] :Ident + | [c_any] :Any + + def expr_symbol + [`,] :Comma | [`(] :Open | [`)] :Close | [`*] :Star | [`::] :DoubleColon + + def expr_interpret + [`fpc] :Fpc + | [`fc] :Fc + | [`fcurs] :Fcurs + | [`ftargs] :Ftargs + | [`fentry `( state_ref::state_ref state_ref::`)] :Fentry + | [var_ref] :VarRef + + def inline_block + [block_item_list] :List + + def block_item_list + [block_item block_item_list] :Rec + | [] :Base + + def block_item + [expr_any] :ExprAny + | [block_symbol] :BlockSymbol + | [block_interpret] :BlockInterpret + | [`{ inline_block `}] :RecBlock + + def block_symbol + [`,] :B1 | [`;] :B2 | [`(] :B3 | [`)] :B4 | [`*] :B5 | [`::] :B6 + + def block_interpret + [expr_interpret] :ExprInterpret + | [`fhold whitespace? `;] :Fhold + | [`fgoto whitespace? `* inline_expr `;] :FgotoExpr + | [`fnext whitespace? `* inline_expr `;] :FnextExpr + | [`fcall whitespace? `* inline_expr `;] :FcallExpr + | [`fncall whitespace? `* inline_expr `;] :FncallExpr + | [`fexec inline_expr `;] :Fexec + | [`fgoto state_ref::state_ref state_ref::`;] :FgotoSr + | [`fnext state_ref::state_ref state_ref::`;] :FnextSr + | [`fcall state_ref::state_ref state_ref::`;] :FcallSr + | [`fncall state_ref::state_ref state_ref::`;] :FncallSr + | [`fret `;] :Fret + | [`fnret `;] :Fnret + | [`fbreak `;] :Fbreak + | [`fnbreak `;] :Fnbreak +end + +namespace ragel + lex + literal `}%% -ni + + ignore /[\t\n ]+/ + ignore /'#' any* :> '\n'/ + + literal `^ `| `- `, `: `! `? `. + literal `( `) `{ -ni ni- `} `* `& `+ + + literal `-- `:> `:>> `<: `-> `** + + literal `|* `*| `=> + + literal `@ `> `< `% `$ + literal `from `to `eof `lerr `err + literal `when `inwhen `outwhen `>? `$? `%? + + literal `:= `|= `= `; `.. `../i `:: + + literal `>~ `$~ `%~ `<~ `@~ `<>~ + literal `>* `$* `%* `<* `@* `<>* + literal `>/ `$/ `%/ `</ `@/ `<>/ + literal `>! `$! `%! `<! `@! `<>! + literal `>^ `$^ `%^ `<^ `@^ `<>^ + literal `<> + + literal `%%--{ -ni `%%++{ -ni + + token include_tok + /'include'/ + { + # Take off the include token. + input->pull( match_length ) + + # Parse the include details, up to the the ';' and stop there. + parse_stop Spec: include_spec(input)[] + + Fn: str + Machine: str + if Spec.string + Fn = $Spec.string + if Spec.word + Machine = $Spec.word + + Stream: stream = ragelInclude( Fn, Machine ) + + if Stream { + input->push( "}--%%" ) + input->push_stream( Stream ) + input->push( "%%--{" ) + } + } + + token import_tok + /'import'/ + { + # Take off the include token. + input->pull( match_length ) + + # Parse the import details, up to the the ';' and stop there. + parse_stop Spec: import_spec(input)[] + + Fn: str + if Spec.string + Fn = $Spec.string + + Stream: stream = ragelImport( Fn ) + + if Stream { + input->push( "}++%%" ) + input->push_stream( Stream ) + input->push( "%%++{" ) + } + } + + + literal `machine `action `variable `alphtype + `access `write `getkey `export `prepush + `postpop `nfaprepush `nfapostpop + + literal `:nfa `:nfa_greedy `:nfa_lazy `:nfa_wrap + `:nfa_wrap_greedy `:nfa_wrap_lazy + `:cond `:condplus `:condstar `): + + token string / + '"' ( [^"\\] | '\\' any )* '"' 'i'? | + "'" ( [^'\\] | '\\' any )* "'" 'i'? + / + + token lex_regex_open /'/'/ -ni + token lex_sqopen_pos /'['/ -ni + token lex_sqopen_neg /'[^'/ -ni + + token word / [a-zA-Z_][a-zA-Z0-9_]* / + token uint / number / + token hex / hex_number / + end + + def include_spec + [word `;] + | [string `;] + | [word string `;] + + def import_spec + [string `;] + + lex + token re_dot / '.' / + token re_star / '*' / + token re_char / ^( '\\' | '.' | '*' | '[' | '/' ) | '\\' . any / + token re_close / '/' 'i'? / + token re_sqopen_pos /'['/ + token re_sqopen_neg /'[^'/ + end + + lex + token re_or_dash / '-' / + token re_or_char / ^( '\\' | '-' | ']' ) | '\\' . any / + token re_or_sqclose / ']' / + end + + # Not cannot start with '{', terminated by ';', rewritten into { inline_expr } + token _inline_expr_reparse + /[^{;] [^;]* ';'/ { + R: str = input->pull( match_length - 1 ) + input->pull( 1 ) + input->push( "}" ) + input->push( R ) + input->push( "{" ) + } + + token variable_name /ident/ + + # This region is for deciding if we want to parse a ragel section, or if we + # want to consume it without interpreting. Consuming is for included + # sections we don't want and all sections in an imported file. + lex + token ign_select /''/ + { + if GblWantSection + input->push( make_token( typeid<ign_want>, '' ) ) + else + input->push( make_token( typeid<ign_ignore>, '' ) ) + } + + token ign_want // + token ign_ignore // + end + + # + # Machine name word. We inspect it to determine if we are interested in the + # section. + # + lex + token mn_word / [a-zA-Z_][a-zA-Z0-9_]* / + { + S: str = input->pull(match_length) + IgnWord: mn_word = make_token( typeid<mn_word>, S ) + input->push( IgnWord ) + + if ( GblImport ) + GblWantSection = false + else if ( GblSearchMachine != "" ) { + if ( S != GblSearchMachine ) + GblWantSection = false + else + GblWantSection = true + } + else { + GblWantSection = true + } + + Name: str = S #$lhs.mn_word + if GblTargetMachine != "" + Name = GblTargetMachine + + Machine: machine = GblMachineMap->find( Name ) + + if !Machine + { + Machine = new machine() + Machine->Name = Name + Machine->ActionParams = new map<str, str>() + Machine->IncludeHistory = new list<include_history_item>() + GblMachineMap->insert( Machine->Name, Machine ) + } + + GblCurMachine = Machine + + # print "want section: [GblWantSection] + } + end + + + def inline_expr_reparse + [_inline_expr_reparse] :Reparse + | [action_expr] :ActionExpr + + def join + [join `, expression] :Rec + | [expression] :Base + + def expression + [expr_left expression_op_list] :Expression + + def expression_op_list + [expression_op expression_op_list] :Op + | [] :Empty + + def expression_op + [`| term] :Or + | [`& term] :And + | [`- term] :Sub + | [`-- term] :Ssub + + def expr_left + [term] :Term + + def term + [term_left term_op_list_short] :Term + + def term_left + [factor_label] :FactorLabel + + # This list is done manually to get shortest match. + def term_op_list_short + [] :Empty + | [term_op term_op_list_short] :Terms + + def term_op + [factor_label] :None + | [`. factor_label] :Dot + | [`:> factor_label] :ColonLt + | [`:>> factor_label] :ColonLtLt + | [`<: factor_label] :GtColon + + def factor_label + [word `: factor_label] :Label + | [factor_ep] :Ep + + def factor_ep + [factor_aug `-> epsilon_target] :Epsilon + | [factor_aug] :Base + + def epsilon_target + [epsilon_target `:: word] :Rec + | [word] :Base + + def action_expr + [`{ CInlineExpr: inline::inline_expr inline::`}] :ActionExpr + + def action_block + [`{ CInlineBlock: inline::inline_block inline::`}] :ActionBlock + + def action_arg_list + [action_arg_list `, action_ref] :Rec + | [action_ref] :Base + + def opt_action_arg_list + [action_arg_list] :List + | [] :Empty + + def named_action_ref + [word] :Plain + { + if ( GblCurMachine->ActionParams->find( $lhs.word ) ) + reject + } + | [word `( opt_action_arg_list `)] :Args + { + if ( ! GblCurMachine->ActionParams->find( $lhs.word ) ) + reject + } + + def action_ref + [named_action_ref] :NamedRef + | [`( named_action_ref `)] :ParenNamed + | [action_block] :Block + + def priority_name + [word] :Word + + def error_name + [word] :Word + + def priority_aug + [uint] :NoSign + | [`+ uint] :Plus + | [`- uint] :Minus + + def aug_base + [`@] :Finish | [`>] :Enter | [`%] :Leave | [`$] :All + + def aug_cond + [`>?] :Start1 | [`$?] :All1 | [`%?] :Leave1 + | [`> `when] :Start2 | [`$ `when] :All2 | [`% `when] :Leave2 + | [`inwhen] :Start3 | [`when] :All3 | [`outwhen] :Leave3 + + def aug_to_state + [`>~] :Start1 | [`<~] :NotStart1 | [`$~] :All1 + | [`%~] :Final1 | [`@~] :NotFinal1 | [`<>~] :Middle1 + | [`> `to] :Start2 | [`< `to] :NotStart2 | [`$ `to] :All2 + | [`% `to] :Final2 | [`@ `to] :NotFinal2 | [`<> `to] :Middle2 + + def aug_from_state + [`>*] :Start1 | [`<*] :NotStart1 | [`$*] :All1 + | [`%*] :Final1 | [`@*] :NotFinal1 | [`<>*] :Middle1 + | [`> `from] :Start2 | [`< `from] :NotStart2 | [`$ `from] :All2 + | [`% `from] :Final2 | [`@ `from] :NotFinal2 | [`<> `from] :Middle2 + + def aug_eof + [`>/] :Start1 | [`</] :NotStart1 | [`$/] :All1 + | [`%/] :Final1 | [`@/] :NotFinal1 | [`<>/] :Middle1 + | [`> `eof] :Start2 | [`< `eof] :NotStart2 | [`$ `eof] :All2 + | [`% `eof] :Final2 | [`@ `eof] :NotFinal2 | [`<> `eof] :Middle2 + + def aug_gbl_error + [`>!] :Start1 | [`<!] :NotStart1 | [`$!] :All1 + | [`%!] :Final1 | [`@!] :NotFinal1 | [`<>!] :Middle1 + | [`> `err] :Start2 | [`< `err] :NotStart2 | [`$ `err] :All2 + | [`% `err] :Final2 | [`@ `err] :NotFinal2 | [`<> `err] :Middle2 + + def aug_local_error + [`>^] :Start1 | [`<^] :NotStart1 | [`$^] :All1 + | [`%^] :Final1 | [`@^] :NotFinal1 | [`<>^] :Middle1 + | [`> `lerr] :Start2 | [`< `lerr] :NotStart2 | [`$ `lerr] :All2 + | [`% `lerr] :Final2 | [`@ `lerr] :NotFinal2 | [`<> `lerr] :Middle2 + + def factor_aug + [factor_aug aug_base action_ref] :ActionRef + | [factor_aug aug_base priority_aug] :PriorEmbed + | [factor_aug aug_base `( priority_name `, priority_aug `)] :NamedPriorEmbed + | [factor_aug aug_cond action_ref] :CondEmbed + | [factor_aug aug_cond `! action_ref] :NegCondEmbed + | [factor_aug aug_to_state action_ref] :ToStateAction + | [factor_aug aug_from_state action_ref] :FromStateAction + | [factor_aug aug_eof action_ref] :EofAction + | [factor_aug aug_gbl_error action_ref] :GblErrorAction + | [factor_aug aug_local_error action_ref] :LocalErrorDef + | [factor_aug aug_local_error `( error_name `, action_ref `)] :LocalErrorName + | [factor_rep] :Base + + def factor_rep + [factor_neg factor_rep_op_list] :Op + + def factor_rep_op_list + [factor_rep_op factor_rep_op_list] :Rec + | [] :Base + + def factor_rep_op + [`*] :Star + | [`**] :StarStar + | [`?] :Optional + | [`+] :Plus + | [`{ factor_rep_num `}] :ExactRep + | [`{ `, factor_rep_num `}] :MaxRep + | [`{ factor_rep_num `, `}] :MinRep + | [`{ LowRep: factor_rep_num `, HighRep: factor_rep_num `}] :RangeRep + + def factor_rep_num + [uint] :RepNum + + def factor_neg + [`! factor_neg] :Bang + | [`^ factor_neg] :Caret + | [factor] :Base + + def opt_max_arg + [`, action_ref] :Action + | [] :Empty + + def nfastar + [`:nfa] :Default + | [`:nfa_lazy] :Lazy + | [`:nfa_greedy] :Greedy + + def nfawrap + [`:nfa_wrap] :Default + | [`:nfa_wrap_lazy] :Lazy + | [`:nfa_wrap_greedy] :Greedy + + def colon_cond + [`:cond] :Cond + | [`:condstar] :CondStar + | [`:condplus] :CondPlus + + def factor + [alphabet_num] :AlphabetNum + | [word] :Word + | [string] :String + | [lex_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock + | [lex_sqopen_neg reg_or_data re_or_sqclose] :NegOrBlock + | [lex_regex_open regex re_close] :Regex + | [RL1: range_lit `.. RL2: range_lit] :Range + | [RL1: range_lit `../i RL2: range_lit] :RangeIndep + | [nfastar `( expression `, + Push: action_ref `, Pop: action_ref `, Init: action_ref `, Stay: action_ref `, + Repeat: action_ref `, Exit: action_ref `):] :Nfa + | [nfawrap `( expression `, + Push: action_ref `, Pop: action_ref `, Init: action_ref `, Stay: action_ref `, + Exit: action_ref `):] :NfaWrap + | [colon_cond `( expression `, + Init: action_ref `, Inc: action_ref `, Min: action_ref OptMax: opt_max_arg `):] :Cond + | [`( join `)] :Join + + def regex + [reg_item_rep_list] :List + + def reg_item_rep_list + [reg_item_rep_list reg_item_rep] :Rec + | [] :Base + + def reg_item_rep + [reg_item re_star] :Star + | [reg_item] :Base + + def reg_item + [re_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock + | [re_sqopen_neg reg_or_data re_or_sqclose] :NegOrBlock + | [re_dot] :Dot + | [re_char] :Char + + def reg_or_data + [reg_or_data reg_or_char] :Data + | [] :Base + + def reg_or_char + [re_or_char] :Char + | [Low: re_or_char re_or_dash High: re_or_char] :Range + + def range_lit + [string] :String + | [alphabet_num] :AN + + def alphabet_num + [uint] :Uint + | [`- uint] :Neg + | [hex] :Hex + + def lm_act + [`=> action_ref] :ActionRef + | [action_block] :ActionBlock + + def opt_lm_act + [lm_act] :Act + | [] :Empty + + def lm_stmt + [join opt_lm_act `;] :LmStmt commit + | [assignment] :Assignment + | [action_spec] :ActionSpec + + def lm_stmt_list + [lm_stmt_list lm_stmt] :Rec + | [lm_stmt] :Base + + def lm + [join] :Join + | [`|* lm_stmt_list `*|] :Lm + | [`:nfa `|* lm_stmt_list `*|] :LmNfa + + # + # Actions + # + def action_param + [word] :Word + + def action_param_list + [action_param_list `, action_param] :Rec + | [action_param] :Base + + def opt_action_param_list + [action_param_list] :List + | [] :Empty + + def action_params + [`( opt_action_param_list `)] :List + { + GblActionParams = true + } + + def action_spec + [`action word action_params action_block] :ActionSpecParams commit + { + # Track that this action has params so we can parse appropriately + # when reducing. + GblCurMachine->ActionParams->insert( $lhs.word, $lhs.word ) + + # Reset after parsing the block. + GblActionParams = false + } + | [`action word action_block] :ActionSpec commit + { + GblActionParams = false + } + + def def_name + [word] :Word + + # + # Machine Instantiations. + # + def assignment + [opt_export def_name `= join `;] :Assignment commit + + def instantiation + [opt_export def_name `:= lm `;] :Instantiation commit + + def nfa_expr + [nfa_expr `| term] :Union + | [term] :Base + + def nfa_round_spec + [Depth: uint `, Group: uint] :Spec + + def nfa_round_list + [nfa_round_list `, nfa_round_spec] :Recurse + | [nfa_round_spec] :Base + + def nfa_rounds + [`( nfa_round_list `)] :Rounds + + def nfa_union + [def_name `|= nfa_rounds nfa_expr `;] :NfaUnion commit + + def alphtype_type + [W1: word] :One + | [W1: word W2: word] :Two + + def opt_export + [`export] :Export + | [] :Base + + def write_arg + [word] :Word + + def machine_name + [`machine mn_word `;] :MachineName + + def open_inc + [`%%--{] :OpenInc + + def close_inc + [host::close_inc] :CloseInc + + def include_statement + [open_inc host::section* close_inc] :IncPost commit + + def open_imp + [`%%++{] :OpenImp + + def close_imp + [host::close_imp] :CloseImp + + def import_statement + [open_imp host::section* close_imp] :ImpPost commit + + def statement + [assignment] :Assignment + | [instantiation] :Instantiation + | [nfa_union] :NfaUnion + | [action_spec] :ActionSpec + | [`prepush action_block] :PrePush commit + | [`postpop action_block] :PostPop commit + | [`variable variable_name inline_expr_reparse] :Variable commit + | [`alphtype alphtype_type `;] :AlphType commit + | [`access inline_expr_reparse] :Access commit + | [`write Cmd: word ArgList: write_arg* `;] :Write commit + | [`getkey inline_expr_reparse] :GetKey commit + | [import_statement] :Import commit + | [include_statement] :Include commit + | [`nfaprepush action_block] :NfaPrePush commit + | [`nfapostpop action_block] :NfaPostPop commit + + def opt_machine_name + [machine_name] :MachineName + | [] :Empty + + def ragel_start + [opt_machine_name ign_want statement*] + | [opt_machine_name ign_ignore consume::tok*] +end + +str prepareLitString( Str: str ) +{ + # TODO: escape sequences + return suffix( prefix( Str, Str.length - 1 ), 1 ) +} + +bool isAbsolutePath( Path: str ) +{ + # TODO: implement this + return false +} + +namespace path + lex + token slash /'/'/ + token chars /[^\/]+/ + end + + def path + [Abs: slash? DirList: dir* File: chars] + + def dir + [chars slash] + + dir *concat_dir( Dir1: dir*, Dir2: dir* ) + { + for D: dir* in Dir1 { + if match D [] { + D = Dir2 + break + } + } + return Dir1 + } +end + +namespace host + def section + [`%%{ ragel::opt_machine_name ragel::ign_want ragel::statement* ragel::`}%%] :MultiLine + | [`%%{ ragel::opt_machine_name ragel::ign_ignore consume::tok* consume::`}%%] :Consume + | [tok] :Token +end + +def start + [SectionList: host::section*] + +list<str> makeIncludePathChecks( CurFileName: str, IncFileName: str ) +{ + new L: list<str>() + + parse CurPath: path::path[ CurFileName ] + parse IncPath: path::path[ IncFileName ] + + if match IncPath.Abs [slash] { + # Included file is absolute + L->push_tail( IncFileName ) + } + else { + # First add the location of current file + if match CurPath.DirList [] + L->push_tail( IncFileName ) + else { + # Current file path + Include Path + Include File + cons NewPath: path::path [ + CurPath.Abs + path::concat_dir( CurPath.DirList, IncPath.DirList ) + IncPath.File + ] + + L->push_tail( $NewPath ) + } + + # Next add include file locations. + for Path: str in GblIncludePaths { + parse IncPath: path::path[ CurFileName ] + L->push_tail( "[Path]/[IncFileName]" ) + } + } + return L +} + +stream ragelInclude( IncFileName: str, Machine: str ) +{ + if IncFileName + IncFileName = prepareLitString( IncFileName ) + + Checks: list<str> + if IncFileName + Checks = makeIncludePathChecks( GblFileName, IncFileName ) + else { + Checks = new list<str>() + Checks->push_tail( GblFileName ) + + } + + Stream: stream + OpenedName: str + for P: str in Checks { + Stream = open( P, "r" ) + if Stream { + OpenedName = P + break + } + } + + if !Stream { + print "error: could not open [IncFileName] + return nil + } + + # Default to the current machine if none is specified. + if !Machine + Machine = GblCurMachine->Name + + if isDuplicateInclude( GblCurMachine, IncFileName, Machine ) + return nil + + addIncludeItem( GblCurMachine, IncFileName, Machine ) + + saveGlobals() + + GblIncludeDepth = GblIncludeDepth + 1 + GblFileName = OpenedName + + # Set up the search and target machine names. Search is the machine we want + # to include and target is the machine we include to. + GblSearchMachine = Machine + GblTargetMachine = GblCurMachine->Name + + return Stream +} + +stream ragelImport( IncFileName: str ) +{ + if IncFileName + IncFileName = prepareLitString( IncFileName ) + + Checks: list<str> + if IncFileName + Checks = makeIncludePathChecks( GblFileName, IncFileName ) + else { + Checks = new list<str>() + Checks->push_tail( GblFileName ) + } + + Stream: stream + OpenedName: str + for P: str in Checks { + Stream = open( P, "r" ) + if Stream { + OpenedName = P + break + } + } + + if !Stream { + print "error: could not open [IncFileName] + return nil + } + + saveGlobals() + + GblFileName = OpenedName + GblImport = true + + return Stream +} diff --git a/ragel/redfsm.cc b/ragel/redfsm.cc new file mode 100644 index 00000000..60e91b05 --- /dev/null +++ b/ragel/redfsm.cc @@ -0,0 +1,1191 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "redfsm.h" +#include "avlmap.h" +#include "mergesort.h" +#include "fsmgraph.h" +#include <iostream> +#include <sstream> +#include <ctime> + +using std::ostringstream; + +GenInlineItem::~GenInlineItem() +{ + if ( children != 0 ) { + children->empty(); + delete children; + } +} + +string GenAction::nameOrLoc() +{ + if ( name.empty() ) { + ostringstream ret; + ret << loc.line << ":" << loc.col; + return ret.str(); + } + else { + return name; + } +} + +RedFsmAp::RedFsmAp( FsmCtx *fsmCtx, int machineId ) +: + keyOps(fsmCtx->keyOps), + fsmCtx(fsmCtx), + machineId(machineId), + forcedErrorState(false), + nextActionId(0), + nextTransId(0), + nextCondId(0), + startState(0), + errState(0), + errTrans(0), + errCond(0), + firstFinState(0), + numFinStates(0), + bAnyToStateActions(false), + bAnyFromStateActions(false), + bAnyRegActions(false), + bAnyEofActions(false), + bAnyEofTrans(false), + bAnyEofActivity(false), + bAnyActionGotos(false), + bAnyActionCalls(false), + bAnyActionNcalls(false), + bAnyActionRets(false), + bAnyActionNrets(false), + bAnyActionByValControl(false), + bAnyRegActionRets(false), + bAnyRegActionByValControl(false), + bAnyRegNextStmt(false), + bAnyRegCurStateRef(false), + bAnyRegBreak(false), + bAnyRegNbreak(false), + bUsingAct(false), + bAnyNfaStates(false), + bAnyNfaPushPops(false), + bAnyNfaPushes(false), + bAnyNfaPops(false), + bAnyTransCondRefs(false), + bAnyNfaCondRefs(false), + nextClass(0), + classMap(0) +{ +} + +RedFsmAp::~RedFsmAp() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + delete[] st->transList; + if ( st->nfaTargs != 0 ) + delete st->nfaTargs; + if ( st->inConds != 0 ) + delete[] st->inConds; + if ( st->inCondTests != 0 ) + delete[] st->inCondTests; + } + + delete[] allStates; + if ( classMap != 0 ) + delete[] classMap; + + for ( TransApSet::Iter ti = transSet; ti.lte(); ti++ ) { + if ( ti->condSpace != 0 ) + delete[] ti->v.outConds; + } + + condSet.empty(); + transSet.empty(); +} + +/* Does the machine have any actions. */ +bool RedFsmAp::anyActions() +{ + return actionMap.length() > 0; +} + +void RedFsmAp::depthFirstOrdering( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + depthFirstOrdering( cond->targ ); + } + } + + if ( state->nfaTargs ) { + for ( RedNfaTargs::Iter s = *state->nfaTargs; s.lte(); s++ ) + depthFirstOrdering( s->state ); + } +} + +/* Ordering states by transition connections. */ +void RedFsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( startState != 0 ) + depthFirstOrdering( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +void RedFsmAp::breadthFirstAdd( RedStateAp *state ) +{ + if ( state->onStateList ) + return; + + state->onStateList = true; + stateList.append( state ); +} + +void RedFsmAp::breadthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + if ( startState != 0 ) + breadthFirstAdd( startState ); + + int depth = 0; + int nextLevel = stateList.length(); + int pos = 0; + + /* To implement breadth-first we traverse the current list (assuming a + * start state) and add children. */ + RedStateAp *cur = stateList.head; + while ( cur != 0 ) { + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = cur->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + breadthFirstAdd( cond->targ ); + } + } + + if ( cur->nfaTargs ) { + for ( RedNfaTargs::Iter s = *cur->nfaTargs; s.lte(); s++ ) + breadthFirstAdd( s->state ); + } + + cur = cur->next; + pos += 1; + + if ( pos == nextLevel ) { + depth += 1; + nextLevel = stateList.length(); + } + } + + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + assert( stateListLen == stateList.length() ); +} + +#ifdef SCORE_ORDERING +void RedFsmAp::readScores() +{ + /* + * Reads processed transitions logged by ASM codegen when LOG_TRANS is + * enabled. Process with: + * + * cat trans-log | sort -n -k 1 -k 2 -k 3 | uniq -c | sort -r -n -k1 -r > scores + */ + FILE *sfn = fopen( "scores", "r" ); + + scores = new long*[nextStateId]; + for ( int i = 0; i < nextStateId; i++ ) { + scores[i] = new long[256]; + memset( scores[i], 0, sizeof(long) * 256 ); + } + + long score, m, state, ch; + while ( true ) { + int n = fscanf( sfn, "%ld %ld %ld %ld\n", &score, &m, &state, &ch ); + if ( n != 4 ) + break; + if ( m == machineId ) + scores[state][ch] = score; + } + fclose( sfn ); + + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + RedTransList::Iter rtel = st->outRange; + int chi = 0; + while ( rtel.lte() ) { + /* 1. Bring chi up to lower end of out range. */ + while ( chi < rtel->lowKey.getVal() ) { + chi++; + } + + /* 2. While inside lower, add in score. */ + while ( chi <= rtel->highKey.getVal() ) { + rtel->score += scores[st->id][chi]; + chi++; + } + + /* 3. Next range. */ + rtel++; + } + } +} + +/* This second pass will collect any states that didn't make it in the first + * pass. Used for depth-first and breadth-first passes. */ +void RedFsmAp::scoreSecondPass( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onListRest ) + return; + + /* Doing depth first, put state on the list. */ + state->onListRest = true; + + if ( !state->onStateList ) { + state->onStateList = true; + stateList.append( state ); + } + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + scoreSecondPass( cond->targ ); + } + } + + if ( state->nfaTargs ) { + for ( RedNfaTargs::Iter s = *state->nfaTargs; s.lte(); s++ ) + scoreSecondPass( s->state ); + } +} + +void RedFsmAp::scoreOrderingDepth( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->score > 10 ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + scoreOrderingDepth( cond->targ ); + } + } + } +} + +void RedFsmAp::scoreOrderingDepth() +{ + readScores(); + + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->onStateList = false; + st->onListRest = false; + } + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + scoreOrderingDepth( startState ); + + scoreSecondPass( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + scoreSecondPass( *en ); + if ( forcedErrorState ) + scoreSecondPass( errState ); + + assert( stateListLen == stateList.length() ); +} + +void RedFsmAp::scoreOrderingBreadth() +{ + readScores(); + + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->onStateList = false; + st->onListRest = false; + } + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + if ( startState != 0 ) + breadthFirstAdd( startState ); + + int depth = 0; + int nextLevel = stateList.length(); + int pos = 0; + + /* To implement breadth-first we traverse the current list (assuming a + * start state) and add children. */ + RedStateAp *cur = stateList.head; + while ( cur != 0 ) { + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = cur->outRange; rtel.lte(); rtel++ ) { + if ( rtel->score > 100 ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + breadthFirstAdd( cond->targ ); + } + } + } + + cur = cur->next; + pos += 1; + + if ( pos == nextLevel ) { + depth += 1; + nextLevel = stateList.length(); + } + } + + scoreSecondPass( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + scoreSecondPass( *en ); + if ( forcedErrorState ) + scoreSecondPass( errState ); + + assert( stateListLen == stateList.length() ); +} +#endif + +void RedFsmAp::randomizedOrdering() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + srand( time( 0 ) ); + + for ( int i = nextStateId; i > 0; i-- ) { + /* Pick one from 0 ... i (how many are left). */ + int nth = rand() % i; + + /* Go forward through the list adding the nth. Need to scan because + * there are items already added in the list. */ + for ( int j = 0; j < nextStateId; j++ ) { + if ( !allStates[j].onStateList ) { + if ( nth == 0 ) { + /* Add. */ + allStates[j].onStateList = true; + stateList.append( &allStates[j] ); + break; + } + else { + nth -= 1; + } + } + } + } + assert( stateListLen == stateList.length() ); +} + +/* Assign state ids by appearance in the state list. */ +void RedFsmAp::sequentialStateIds() +{ + /* Table based machines depend on the state numbers starting at zero. */ + nextStateId = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->id = nextStateId++; +} + +/* Stable sort the states by final state status. */ +void RedFsmAp::sortStatesByFinal() +{ + /* Move forward through the list and move final states onto the end. */ + RedStateAp *state = 0; + RedStateAp *next = stateList.head; + RedStateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinal ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +/* Assign state ids by final state state status. */ +void RedFsmAp::sortStateIdsByFinal() +{ + /* Table based machines depend on this starting at zero. */ + nextStateId = 0; + + /* First pass to assign non final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( ! st->isFinal ) + st->id = nextStateId++; + } + + /* Second pass to assign final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal ) + st->id = nextStateId++; + } +} + +struct CmpStateById +{ + static int compare( RedStateAp *st1, RedStateAp *st2 ) + { + if ( st1->id < st2->id ) + return -1; + else if ( st1->id > st2->id ) + return 1; + else + return 0; + } +}; + +void RedFsmAp::sortByStateId() +{ + /* Make the array. */ + int pos = 0; + RedStateAp **ptrList = new RedStateAp*[stateList.length()]; + for ( RedStateList::Iter st = stateList; st.lte(); st++, pos++ ) + ptrList[pos] = st; + + MergeSort<RedStateAp*, CmpStateById> mergeSort; + mergeSort.sort( ptrList, stateList.length() ); + + stateList.abandon(); + for ( int st = 0; st < pos; st++ ) + stateList.append( ptrList[st] ); + + delete[] ptrList; +} + +/* Find the final state with the lowest id. */ +void RedFsmAp::findFirstFinState() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) + firstFinState = st; + } +} + +void RedFsmAp::assignActionLocs() +{ + int nextLocation = 0; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + /* Store the loc, skip over the array and a null terminator. */ + act->location = nextLocation; + nextLocation += act->key.length() + 1; + } +} + +/* Check if we can extend the current range by displacing any ranges + * ahead to the singles. */ +bool RedFsmAp::canExtend( const RedTransList &list, int pos ) +{ + /* Get the transition that we want to extend. */ + RedTransAp *extendTrans = list[pos].value; + + /* Look ahead in the transition list. */ + for ( int next = pos + 1; next < list.length(); pos++, next++ ) { + /* If they are not continuous then cannot extend. */ + Key nextKey = list[next].lowKey; + keyOps->decrement( nextKey ); + if ( keyOps->ne( list[pos].highKey, nextKey ) ) + break; + + /* Check for the extenstion property. */ + if ( extendTrans == list[next].value ) + return true; + + /* If the span of the next element is more than one, then don't keep + * checking, it won't be moved to single. */ + unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); + if ( nextSpan > 1 ) + break; + } + return false; +} + +/* Move ranges to the singles list if it means we can extend some ranges, or if + * the spans are of length one. */ +void RedFsmAp::moveSelectTransToSingle( RedStateAp *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); ) { + /* Check if this is a range we can extend. */ + if ( canExtend( range, rpos ) ) { + /* Transfer singles over. */ + while ( range[rpos].value != range[rpos+1].value ) { + /* Transfer the range to single. */ + single.append( range[rpos+1] ); + range.remove( rpos+1 ); + } + + /* Extend. */ + range[rpos].highKey = range[rpos+1].highKey; + range.remove( rpos+1 ); + } + /* Maybe move it to the singles. */ + else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { + single.append( range[rpos] ); + range.remove( rpos ); + } + else { + /* Keeping it in the ranges. */ + rpos += 1; + } + } +} + +void RedFsmAp::moveAllTransToSingle( RedStateAp *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); rpos++ ) { + + RedTransEl el = range[rpos]; + unsigned long long span = keyOps->span( el.lowKey, el.highKey ); + + Key key = el.lowKey; + for ( unsigned long long pos = 0; pos < span; pos++ ) { + el.lowKey = el.highKey = key; + single.append( el ); + keyOps->increment( key ); + } + } + range.empty(); +} + +/* Look through ranges and choose suitable single character transitions. */ +void RedFsmAp::moveSelectTransToSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveSelectTransToSingle( st ); + } +} + +void RedFsmAp::moveAllTransToSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveAllTransToSingle( st ); + } +} + +void RedFsmAp::makeFlat() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + st->transList = new RedTransAp*[ span ]; + memset( st->transList, 0, sizeof(RedTransAp*)*span ); + + for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->lowKey, trans->lowKey )-1; + trSpan = keyOps->span( trans->lowKey, trans->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->transList[base+pos] = trans->value; + } + + /* Fill in the gaps with the default transition. */ + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } +} + +void RedFsmAp::characterClass( EquivList &equiv ) +{ + /* Find the global low and high keys. */ + bool anyTrans = false; + Key lowKey = keyOps->maxKey; + Key highKey = keyOps->minKey; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) + continue; + + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + + if ( keyOps->lt( st->lowKey, lowKey ) ) + lowKey = st->lowKey; + + if ( keyOps->gt( st->highKey, highKey ) ) + highKey = st->highKey; + + anyTrans = true; + } + + if ( ! anyTrans ) { + this->lowKey = lowKey; + this->highKey = highKey; + this->classMap = 0; + this->nextClass = 1; + return; + } + + long long next = 1; + equiv.append( new EquivClass( lowKey, highKey, next++ ) ); + + /* Start with a single equivalence class and break it up using range + * boundaries of each state. This will tell us what the equivalence class + * ranges are. These are the ranges that always go to the same state, + * across all states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) + continue; + + EquivList newList; + PairKeyMap uniqPairs; + + /* What is the set of unique transitions (*for this state) */ + EquivAlloc uniqTrans; + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( ! uniqTrans.find( rtel->value ) ) + uniqTrans.insert( rtel->value, next++ ); + } + + /* Merge with whole-machine equiv classes. */ + for ( RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> > + pair( fsmCtx, equiv, st->outRange ); !pair.end(); pair++ ) + { + switch ( pair.userState ) { + + case RangePairIter<EquivClass>::RangeOverlap: { + /* Look up the char for s2. */ + EquivAllocEl *s2El = uniqTrans.find( pair.s2Tel.trans->value ); + + /* Can't use either equiv classes, find uniques. */ + PairKey pairKey( pair.s1Tel.trans->value, s2El->value ); + PairKeyMapEl *pairEl = uniqPairs.find( pairKey ); + if ( ! pairEl ) + pairEl = uniqPairs.insert( pairKey, next++ ); + + EquivClass *equivClass = new EquivClass( + pair.s1Tel.lowKey, pair.s1Tel.highKey, + pairEl->value ); + newList.append( equivClass ); + break; + } + + case RangePairIter<EquivClass>::RangeInS1: { + EquivClass *equivClass = new EquivClass( + pair.s1Tel.lowKey, pair.s1Tel.highKey, + pair.s1Tel.trans->value ); + newList.append( equivClass ); + break; + } + + case RangePairIter<EquivClass>::RangeInS2: { + /* Look up the char for s2. */ + EquivAllocEl *s2El = uniqTrans.find( pair.s2Tel.trans->value ); + + EquivClass *equivClass = new EquivClass( + pair.s2Tel.lowKey, pair.s2Tel.highKey, + s2El->value ); + newList.append( equivClass ); + break; + } + + case RangePairIter<EquivClass>::BreakS1: + case RangePairIter<EquivClass>::BreakS2: + break; + } + } + + equiv.empty(); + equiv.transfer( newList ); + } + + /* Reduce to sequential. */ + next = 0; + BstMap<long long, long long> map; + for ( EquivClass *c = equiv.head; c != 0; c = c->next ) { + BstMapEl<long long, long long> *el = map.find( c->value ); + if ( ! el ) + el = map.insert( c->value, next++ ); + c->value = el->value; + } + + /* Build the map and emit arrays from the range-based equiv classes. Will + * likely crash if there are no transitions in the FSM. */ + long long maxSpan = keyOps->span( lowKey, highKey ); + long long *dest = new long long[maxSpan]; + memset( dest, 0, sizeof(long long) * maxSpan ); + + for ( EquivClass *c = equiv.head; c != 0; c = c->next ) { + long long base = keyOps->span( lowKey, c->lowKey ) - 1; + long long span = keyOps->span( c->lowKey, c->highKey ); + for ( long long s = 0; s < span; s++ ) + dest[base + s] = c->value; + } + + this->lowKey = lowKey; + this->highKey = highKey; + this->classMap = dest; + this->nextClass = next; + +} + +void RedFsmAp::makeFlatClass() +{ + EquivList equiv; + characterClass( equiv ); + + /* Expand the transitions. This uses the equivalence classes. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->low = st->high = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + + /* Compute low and high in class space. Use a pair iter to find all + * the clases. Alleviates the need to iterate the whole input + * alphabet. */ + st->low = nextClass; + st->high = -1; + for ( RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> > + pair( fsmCtx, equiv, st->outRange ); !pair.end(); pair++ ) + { + if ( pair.userState == RangePairIter<PiList<EquivClass>, PiVector<RedTransEl> >::RangeOverlap || + pair.userState == RangePairIter<PiList<EquivClass>, PiVector<RedTransEl> >::RangeInS2 ) + { + long long off = keyOps->span( lowKey, pair.s2Tel.lowKey ) - 1; + if ( classMap[off] < st->low ) + st->low = classMap[off]; + if ( classMap[off] > st->high ) + st->high = classMap[off]; + } + } + + long long span = st->high - st->low + 1; + st->transList = new RedTransAp*[ span ]; + memset( st->transList, 0, sizeof(RedTransAp*)*span ); + + for ( RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> > + pair( fsmCtx, equiv, st->outRange ); !pair.end(); pair++ ) + { + if ( pair.userState == RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> >::RangeOverlap || + pair.userState == RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> >::RangeInS2 ) + { + long long off = keyOps->span( lowKey, pair.s2Tel.lowKey ) - 1; + st->transList[ classMap[off] - st->low ] = pair.s2Tel.trans->value; + } + } + + /* Fill in the gaps with the default transition. */ + for ( long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } + + equiv.empty(); +} + + +/* A default transition has been picked, move it from the outRange to the + * default pointer. */ +void RedFsmAp::moveToDefault( RedTransAp *defTrans, RedStateAp *state ) +{ + /* Rewrite the outRange, omitting any ranges that use + * the picked default. */ + RedTransList outRange; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* If it does not take the default, copy it over. */ + if ( rtel->value != defTrans ) + outRange.append( *rtel ); + } + + /* Save off the range we just created into the state's range. */ + state->outRange.transfer( outRange ); + + /* Store the default. */ + state->defTrans = defTrans; +} + +bool RedFsmAp::alphabetCovered( RedTransList &outRange ) +{ + /* Cannot cover without any out ranges. */ + if ( outRange.length() == 0 ) + return false; + + /* If the first range doesn't start at the the lower bound then the + * alphabet is not covered. */ + RedTransList::Iter rtel = outRange; + if ( keyOps->lt( keyOps->minKey, rtel->lowKey ) ) + return false; + + /* Check that every range is next to the previous one. */ + rtel.increment(); + for ( ; rtel.lte(); rtel++ ) { + Key highKey = rtel[-1].highKey; + keyOps->increment( highKey ); + if ( keyOps->ne( highKey, rtel->lowKey ) ) + return false; + } + + /* The last must extend to the upper bound. */ + RedTransEl *last = &outRange[outRange.length()-1]; + if ( keyOps->lt( last->highKey, keyOps->maxKey ) ) + return false; + + return true; +} + +RedTransAp *RedFsmAp::chooseDefaultSpan( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many alphabet characters the + * transition spans. */ + unsigned long long *span = new unsigned long long[stateTransSet.length()]; + memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + int pos = inSet - stateTransSet.data; + span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); + } + + /* Find the max span, choose it for making the default. */ + RedTransAp *maxTrans = 0; + unsigned long long maxSpan = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( span[rtel.pos()] > maxSpan ) { + maxSpan = span[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] span; + return maxTrans; +} + +/* Pick default transitions from ranges for the states. */ +void RedFsmAp::chooseDefaultSpan() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Only pick a default transition if the alphabet is covered. This + * avoids any transitions in the out range that go to error and avoids + * the need for an ERR state. */ + if ( alphabetCovered( st->outRange ) ) { + /* Pick a default transition by largest span. */ + RedTransAp *defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } + } +} + +RedTransAp *RedFsmAp::chooseDefaultGoto( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond(c); + if ( cond->targ == state->next ) + return rtel->value; + } + } + return 0; +} + +void RedFsmAp::chooseDefaultGoto() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultGoto( st ); + if ( defTrans == 0 ) + defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTransAp *RedFsmAp::chooseDefaultNumRanges( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many ranges use the transition. */ + int *numRanges = new int[stateTransSet.length()]; + memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + numRanges[inSet - stateTransSet.data] += 1; + } + + /* Find the max number of ranges. */ + RedTransAp *maxTrans = 0; + int maxNumRanges = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( numRanges[rtel.pos()] > maxNumRanges ) { + maxNumRanges = numRanges[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] numRanges; + return maxTrans; +} + +void RedFsmAp::chooseDefaultNumRanges() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultNumRanges( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedCondAp *RedFsmAp::getErrorCond() +{ + return allocateCond( getErrorState(), 0 ); +} + +RedTransAp *RedFsmAp::getErrorTrans() +{ + return allocateTrans( getErrorState(), 0 ); +} + +RedStateAp *RedFsmAp::getErrorState() +{ + /* Something went wrong. An error state is needed but one was not supplied + * by the frontend. */ + assert( errState != 0 ); + return errState; +} + +/* Makes a plain transition. */ +RedTransAp *RedFsmAp::allocateTrans( RedStateAp *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTransAp redTrans( 0, 0, targ, action ); + RedTransAp *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTransAp( nextTransId++, nextCondId++, targ, action ); + transSet.insert( inDict ); + } + return inDict; +} + +/* Makes a cond list transition. */ +RedTransAp *RedFsmAp::allocateTrans( GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTransAp redTrans( 0, condSpace, outConds, numConds, errCond ); + RedTransAp *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTransAp( nextTransId++, condSpace, outConds, numConds, errCond ); + transSet.insert( inDict ); + } + else { + /* Need to free the out cond vector. */ + delete[] outConds; + } + return inDict; +} + +RedCondAp *RedFsmAp::allocateCond( RedStateAp *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedCondAp redCond( targ, action, 0 ); + RedCondAp *inDict = condSet.find( &redCond ); + if ( inDict == 0 ) { + inDict = new RedCondAp( targ, action, nextCondId++ ); + condSet.insert( inDict ); + } + return inDict; +} + +void RedFsmAp::partitionFsm( int nparts ) +{ + /* At this point the states are ordered by a depth-first traversal. We + * will allocate to partitions based on this ordering. */ + this->nParts = nparts; + int partSize = stateList.length() / nparts; + int remainder = stateList.length() % nparts; + int numInPart = partSize; + int partition = 0; + if ( remainder-- > 0 ) + numInPart += 1; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->partition = partition; + + numInPart -= 1; + if ( numInPart == 0 ) { + partition += 1; + numInPart = partSize; + if ( remainder-- > 0 ) + numInPart += 1; + } + } +} + +void RedFsmAp::setInTrans() +{ + /* First pass counts the number of transitions. */ + for ( CondApSet::Iter trans = condSet; trans.lte(); trans++ ) + trans->p.targ->numInConds += 1; + + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + trans->p.targ->numInConds += 1; + else { + /* We have a placement choice here, but associate it with the + * first. */ + RedCondPair *pair = trans->outCond( 0 ); + pair->targ->numInCondTests += 1; + } + } + + /* Allocate. Reset the counts so we can use them as the current size. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->inConds = new RedCondPair*[st->numInConds]; + st->numInConds = 0; + + st->inCondTests = new RedTransAp*[st->numInCondTests]; + st->numInCondTests = 0; + } + + /* Fill the arrays. */ + for ( CondApSet::Iter trans = condSet; trans.lte(); trans++ ) { + RedStateAp *targ = trans->p.targ; + targ->inConds[targ->numInConds++] = &trans->p; + } + + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) { + RedStateAp *targ = trans->p.targ; + targ->inConds[targ->numInConds++] = &trans->p; + } + else { + RedCondPair *pair = trans->outCond( 0 ); + RedStateAp *targ = pair->targ; + targ->inCondTests[targ->numInCondTests++] = trans; + } + } +} diff --git a/ragel/redfsm.h b/ragel/redfsm.h new file mode 100644 index 00000000..392b1a9c --- /dev/null +++ b/ragel/redfsm.h @@ -0,0 +1,889 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _REDFSM_H +#define _REDFSM_H + +#include <assert.h> +#include <string.h> +#include <string> +#include "config.h" +#include "common.h" +#include "vector.h" +#include "dlist.h" +#include "compare.h" +#include "bstmap.h" +#include "bstset.h" +#include "avlmap.h" +#include "avltree.h" +#include "avlbasic.h" +#include "mergesort.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" + +#define TRANS_ERR_TRANS 0 +#define STATE_ERR_STATE 0 +#define FUNC_NO_FUNC 0 + +// #define SCORE_ORDERING 1 + +using std::string; + +struct RedStateAp; +struct GenInlineList; +struct GenAction; +struct FsmCtx; +struct GenCondSpace; +typedef BstSet<int> RedCondKeySet; + +/* + * Inline code tree + */ +struct GenInlineItem +{ + enum Type + { + Text, Goto, Call, Ncall, Next, GotoExpr, CallExpr, + NcallExpr, NextExpr, Ret, Nret, + PChar, Char, Hold, Curs, Targs, Entry, Exec, Break, Nbreak, + LmSwitch, LmExec, LmSetActId, LmSetTokEnd, LmGetTokEnd, + LmInitAct, LmInitTokStart, LmSetTokStart, NfaClear, + HostStmt, HostExpr, HostText, + GenStmt, GenExpr, LmCase, LmHold, + NfaWrapAction, NfaWrapConds + }; + + GenInlineItem( const InputLoc &loc, Type type ) : + loc(loc), targId(0), targState(0), + lmId(0), children(0), offset(0), + wrappedAction(0), type(type) { } + + ~GenInlineItem(); + + InputLoc loc; + std::string data; + int targId; + RedStateAp *targState; + int lmId; + GenInlineList *children; + int offset; + GenAction *wrappedAction; + GenCondSpace *condSpace; + RedCondKeySet condKeySet; + Type type; + + GenInlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct GenInlineList : public DList<GenInlineItem> { }; + +struct GenInlineExpr +{ + GenInlineExpr( const InputLoc &loc, GenInlineList *inlineList ) + : loc(loc), inlineList( inlineList ) {} + + ~GenInlineExpr() + { + if ( inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + } + } + + InputLoc loc; + GenInlineList *inlineList; +}; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct GenAction +: + public DListEl<GenAction> +{ + GenAction( ) + : + inlineList(0), + actionId(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numNfaPushRefs(0), + numNfaRestoreRefs(0), + numNfaPopActionRefs(0), + numNfaPopTestRefs(0) + { + } + + ~GenAction() + { + if ( inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + } + } + + /* Data collected during parse. */ + InputLoc loc; + std::string name; + GenInlineList *inlineList; + int actionId; + + string nameOrLoc(); + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numNfaPushRefs; + int numNfaRestoreRefs; + int numNfaPopActionRefs; + int numNfaPopTestRefs; +}; + + +/* Forwards. */ +struct RedStateAp; +struct StateAp; + +/* Transistion GenAction Element. */ +typedef SBstMapEl< int, GenAction* > GenActionTableEl; + +/* Transition GenAction Table. */ +struct GenActionTable + : public SBstMap< int, GenAction*, CmpOrd<int> > +{ + void setAction( int ordering, GenAction *action ); + void setActions( int *orderings, GenAction **actions, int nActs ); + void setActions( const GenActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpGenActionTableEl +{ + static int compare( const GenActionTableEl &action1, + const GenActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for GenActionTable. */ +typedef CmpSTable< GenActionTableEl, CmpGenActionTableEl > CmpGenActionTable; + +/* Set of states. */ +typedef BstSet<RedStateAp*> RedStateSet; +typedef BstSet<int> IntSet; + +/* Reduced action. */ +struct RedAction +: + public AvlTreeEl<RedAction> +{ + RedAction( ) + : + key(), + eofRefs(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numNfaPushRefs(0), + numNfaRestoreRefs(0), + numNfaPopActionRefs(0), + numNfaPopTestRefs(0), + bAnyNextStmt(false), + bAnyCurStateRef(false), + bAnyBreakStmt(false), + bUsingAct(false) + { } + + const GenActionTable &getKey() + { return key; } + + GenActionTable key; + int actListId; + int location; + IntSet *eofRefs; + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numNfaPushRefs; + int numNfaRestoreRefs; + int numNfaPopActionRefs; + int numNfaPopTestRefs; + + bool anyNextStmt() { return bAnyNextStmt; } + bool anyCurStateRef() { return bAnyCurStateRef; } + bool anyBreakStmt() { return bAnyBreakStmt; } + bool usingAct() { return bUsingAct; } + + bool bAnyNextStmt; + bool bAnyCurStateRef; + bool bAnyBreakStmt; + bool bUsingAct; +}; + +typedef AvlTree<RedAction, GenActionTable, CmpGenActionTable> GenActionTableMap; + +struct RedCondPair +{ + int id; + RedStateAp *targ; + RedAction *action; +}; + +struct RedCondAp +: + public AvlTreeEl<RedCondAp> +{ + RedCondAp( RedStateAp *targ, RedAction *action, int id ) + { + p.id = id; + p.targ = targ; + p.action = action; + } + + RedCondPair p; +}; + +struct RedCondEl +{ + CondKey key; + RedCondAp *value; +}; + +struct CmpRedCondEl +{ + static int compare( const RedCondEl &el1, const RedCondEl &el2 ) + { + if ( el1.key < el2.key ) + return -1; + else if ( el1.key > el2.key ) + return 1; + else if ( el1.value < el2.value ) + return -1; + else if ( el1.value > el2.value ) + return 1; + else + return 0; + } +}; + +typedef Vector< GenAction* > GenCondSet; + +struct GenCondSpace +{ + GenCondSpace() + : + numTransRefs(0), + numNfaRefs(0) + {} + + Key baseKey; + GenCondSet condSet; + int condSpaceId; + + long fullSize() + { return ( 1 << condSet.length() ); } + + long numTransRefs; + long numNfaRefs; + + GenCondSpace *next, *prev; +}; + +typedef DList<GenCondSpace> CondSpaceList; + +struct RedCondVect +{ + int numConds; + RedCondEl *outConds; + RedCondAp *errCond; +}; + +/* Reduced transition. */ +struct RedTransAp +: + public AvlTreeEl<RedTransAp> +{ + RedTransAp( int id, GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ) + : + id(id), + condSpace(condSpace) + { + v.outConds = outConds; + v.numConds = numConds; + v.errCond = errCond; + } + + RedTransAp( int id, int condId, RedStateAp *targ, RedAction *action ) + : + id(id), + condSpace(0) + { + p.id = condId; + p.targ = targ; + p.action = action; + } + + long condFullSize() + { + return condSpace == 0 ? 1 : condSpace->fullSize(); + } + + CondKey outCondKey( int off ) + { + return condSpace == 0 ? CondKey(0) : v.outConds[off].key; + } + + RedCondPair *outCond( int off ) + { + return condSpace == 0 ? &p : &v.outConds[off].value->p; + } + + int numConds() + { + return condSpace == 0 ? 1 : v.numConds; + } + + RedCondPair *errCond() + { + return condSpace == 0 ? 0 : ( v.errCond != 0 ? &v.errCond->p : 0 ); + } + + int id; + GenCondSpace *condSpace; + + /* Either a pair or a vector of conds. */ + union + { + RedCondPair p; + RedCondVect v; + }; +}; + +/* Compare of transitions for the final reduction of transitions. Comparison + * is on target and the pointer to the shared action table. It is assumed that + * when this is used the action tables have been reduced. */ +struct CmpRedTransAp +{ + static int compare( const RedTransAp &t1, const RedTransAp &t2 ) + { + if ( t1.condSpace < t2.condSpace ) + return -1; + else if ( t1.condSpace > t2.condSpace ) + return 1; + else { + if ( t1.condSpace == 0 ) { + if ( t1.p.targ < t2.p.targ ) + return -1; + else if ( t1.p.targ > t2.p.targ ) + return 1; + else if ( t1.p.action < t2.p.action ) + return -1; + else if ( t1.p.action > t2.p.action ) + return 1; + else + return 0; + + } + else { + if ( t1.v.numConds < t2.v.numConds ) + return -1; + else if ( t1.v.numConds > t2.v.numConds ) + return 1; + else + { + RedCondEl *i1 = t1.v.outConds, *i2 = t2.v.outConds; + long len = t1.v.numConds, cmpResult; + for ( long pos = 0; pos < len; + pos += 1, i1 += 1, i2 += 1 ) + { + cmpResult = CmpRedCondEl::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } + } + } +}; + +struct CmpRedCondAp +{ + static int compare( const RedCondAp &t1, const RedCondAp &t2 ) + { + if ( t1.p.targ < t2.p.targ ) + return -1; + else if ( t1.p.targ > t2.p.targ ) + return 1; + else if ( t1.p.action < t2.p.action ) + return -1; + else if ( t1.p.action > t2.p.action ) + return 1; + else + return 0; + } +}; + +typedef AvlBasic<RedTransAp, CmpRedTransAp> TransApSet; +typedef AvlBasic<RedCondAp, CmpRedCondAp> CondApSet; + +/* Element in out range. */ +struct RedTransEl +{ + /* Constructors. */ + RedTransEl( Key lowKey, Key highKey, RedTransAp *value ) + : + lowKey(lowKey), + highKey(highKey), + value(value) +#ifdef SCORE_ORDERING + , score(0) +#endif + { } + + Key lowKey, highKey; + RedTransAp *value; +#ifdef SCORE_ORDERING + long long score; +#endif +}; + +typedef Vector<RedTransEl> RedTransList; +typedef Vector<RedStateAp*> RedStateVect; + +typedef BstMapEl<RedStateAp*, unsigned long long> RedSpanMapEl; +typedef BstMap<RedStateAp*, unsigned long long> RedSpanMap; + +/* Compare used by span map sort. Reverse sorts by the span. */ +struct CmpRedSpanMapEl +{ + static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) + { + if ( smel1.value > smel2.value ) + return -1; + else if ( smel1.value < smel2.value ) + return 1; + else + return 0; + } +}; + +/* Sorting state-span map entries by span. */ +typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; + +/* Set of entry ids that go into this state. */ +typedef Vector<int> EntryIdVect; +typedef Vector<char*> EntryNameVect; + +struct Condition +{ + Condition( ) + : key(0), baseKey(0) {} + + Key key; + Key baseKey; + GenCondSet condSet; + + Condition *next, *prev; +}; +typedef DList<Condition> ConditionList; + +struct GenStateCond +{ + Key lowKey; + Key highKey; + + GenCondSpace *condSpace; + + GenStateCond *prev, *next; +}; +typedef DList<GenStateCond> GenStateCondList; +typedef Vector<GenStateCond*> StateCondVect; + +struct RedNfaTarg +{ + RedNfaTarg( RedStateAp *state, RedAction *push, + RedAction *popTest, int order ) + : + id(0), + state(state), + push(push), + popTest(popTest), + order(order) + {} + + long id; + RedStateAp *state; + RedAction *push; + RedAction *popTest; + int order; +}; + +struct RedNfaTargCmp +{ + static inline long compare( const RedNfaTarg &k1, const RedNfaTarg &k2 ) + { + if ( k1.order < k2.order ) + return -1; + else if ( k1.order > k2.order ) + return 1; + return 0; + } +}; + +typedef Vector<RedNfaTarg> RedNfaTargs; + +/* Reduced state. */ +struct RedStateAp +{ + RedStateAp() + : + defTrans(0), + transList(0), + isFinal(false), + labelNeeded(false), + outNeeded(false), + onStateList(false), + onListRest(false), + toStateAction(0), + fromStateAction(0), + eofAction(0), + eofTrans(0), + id(0), + bAnyRegCurStateRef(false), + partitionBoundary(false), + inConds(0), + numInConds(0), + inCondTests(0), + numInCondTests(0), + nfaTargs(0), + outCondSpace(0) + { } + + /* Transitions out. */ + RedTransList outSingle; + RedTransList outRange; + RedTransAp *defTrans; + + /* For flat keys. */ + Key lowKey, highKey; + RedTransAp **transList; + long long low, high; + + /* The list of states that transitions from this state go to. */ + RedStateVect targStates; + + bool isFinal; + bool labelNeeded; + bool outNeeded; + bool onStateList; + bool onListRest; + RedAction *toStateAction; + RedAction *fromStateAction; + RedAction *eofAction; + RedTransAp *eofTrans; + int id; + + /* Pointers for the list of states. */ + RedStateAp *prev, *next; + + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool bAnyRegCurStateRef; + + int partition; + bool partitionBoundary; + + RedCondPair **inConds; + int numInConds; + + RedTransAp **inCondTests; + int numInCondTests; + + RedNfaTargs *nfaTargs; + GenCondSpace *outCondSpace; + RedCondKeySet outCondKeys; +}; + +/* List of states. */ +typedef DList<RedStateAp> RedStateList; + +/* Set of reduced transitons. Comparison is by pointer. */ +typedef BstSet< RedTransAp*, CmpOrd<RedTransAp*> > RedTransSet; + +/* Next version of the fsm machine. */ +struct RedFsmAp +{ + RedFsmAp( FsmCtx *fsmCtx, int machineId ); + ~RedFsmAp(); + + KeyOps *keyOps; + FsmCtx *fsmCtx; + int machineId; + + bool forcedErrorState; + + int nextActionId; + int nextTransId; + int nextCondId; + + /* Next State Id doubles as the total number of state ids. */ + int nextStateId; + + TransApSet transSet; + CondApSet condSet; + GenActionTableMap actionMap; + RedStateList stateList; + RedStateSet entryPoints; + RedStateAp *startState; + RedStateAp *errState; + RedTransAp *errTrans; + RedCondAp *errCond; + RedTransAp *errActionTrans; + RedStateAp *firstFinState; + RedStateAp *allStates; + int numFinStates; + int nParts; + + bool bAnyToStateActions; + bool bAnyFromStateActions; + bool bAnyRegActions; + bool bAnyEofActions; + bool bAnyEofTrans; + bool bAnyEofActivity; + bool bAnyActionGotos; + bool bAnyActionCalls; + bool bAnyActionNcalls; + bool bAnyActionRets; + bool bAnyActionNrets; + bool bAnyActionByValControl; + bool bAnyRegActionRets; + bool bAnyRegActionByValControl; + bool bAnyRegNextStmt; + bool bAnyRegCurStateRef; + bool bAnyRegBreak; + bool bAnyRegNbreak; + bool bUsingAct; + bool bAnyNfaStates; + bool bAnyNfaPushPops; + bool bAnyNfaPushes; + bool bAnyNfaPops; + bool bAnyTransCondRefs; + bool bAnyNfaCondRefs; + + int maxState; + int maxSingleLen; + int maxRangeLen; + int maxKeyOffset; + int maxIndexOffset; + int maxIndex; + int maxActListId; + int maxActionLoc; + int maxActArrItem; + unsigned long long maxSpan; + int maxFlatIndexOffset; + Key maxKey; + int maxCondSpaceId; + int maxCond; + + bool anyActions(); + bool anyToStateActions() { return bAnyToStateActions; } + bool anyFromStateActions() { return bAnyFromStateActions; } + bool anyRegActions() { return bAnyRegActions; } + bool anyEofActions() { return bAnyEofActions; } + bool anyEofTrans() { return bAnyEofTrans; } + bool anyEofActivity() { return bAnyEofActivity; } + bool anyActionGotos() { return bAnyActionGotos; } + bool anyActionCalls() { return bAnyActionCalls; } + bool anyActionNcalls() { return bAnyActionNcalls; } + bool anyActionRets() { return bAnyActionRets; } + bool anyActionNrets() { return bAnyActionNrets; } + bool anyActionByValControl() { return bAnyActionByValControl; } + bool anyRegActionRets() { return bAnyRegActionRets; } + bool anyRegActionByValControl() { return bAnyRegActionByValControl; } + bool anyRegNextStmt() { return bAnyRegNextStmt; } + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool anyRegBreak() { return bAnyRegBreak; } + bool usingAct() { return bUsingAct; } + bool anyRegNbreak() { return bAnyRegNbreak; } + bool anyNfaStates() { return bAnyNfaStates; } + + /* Is is it possible to extend a range by bumping ranges that span only + * one character to the singles array. */ + bool canExtend( const RedTransList &list, int pos ); + + /* Pick single transitions from the ranges. */ + void moveSelectTransToSingle( RedStateAp *state ); + void moveAllTransToSingle( RedStateAp *state ); + + void moveSelectTransToSingle(); + void moveAllTransToSingle(); + + void makeFlat(); + + /* State low/high, in key space and class space. */ + Key lowKey; + Key highKey; + long long nextClass; + long long *classMap; + + /* Support structs for equivalence class computation. */ + struct EquivClass + { + EquivClass( Key lowKey, Key highKey, long long value ) + : lowKey(lowKey), highKey(highKey), value(value) {} + + Key lowKey, highKey; + long long value; + EquivClass *prev, *next; + }; + + typedef DList<EquivClass> EquivList; + typedef BstMap<RedTransAp*, int> EquivAlloc; + typedef BstMapEl<RedTransAp*, int> EquivAllocEl; + + struct PairKey + { + PairKey( long long k1, long long k2 ) + : k1(k1), k2(k2) {} + + long long k1; + long long k2; + }; + + struct PairKeyCmp + { + static inline long compare( const PairKey &k1, const PairKey &k2 ) + { + if ( k1.k1 < k2.k1 ) + return -1; + else if ( k1.k1 > k2.k1 ) + return 1; + if ( k1.k2 < k2.k2 ) + return -1; + else if ( k1.k2 > k2.k2 ) + return 1; + else + return 0; + } + }; + + typedef BstMap< PairKey, long long, PairKeyCmp > PairKeyMap; + typedef BstMapEl< PairKey, long long > PairKeyMapEl; + + void characterClass( EquivList &equiv ); + void makeFlatClass(); + + /* Move a selected transition from ranges to default. */ + void moveToDefault( RedTransAp *defTrans, RedStateAp *state ); + + /* Pick a default transition by largest span. */ + RedTransAp *chooseDefaultSpan( RedStateAp *state ); + void chooseDefaultSpan(); + + /* Pick a default transition by most number of ranges. */ + RedTransAp *chooseDefaultNumRanges( RedStateAp *state ); + void chooseDefaultNumRanges(); + + /* Pick a default transition tailored towards goto driven machine. */ + RedTransAp *chooseDefaultGoto( RedStateAp *state ); + void chooseDefaultGoto(); + + /* Ordering states by transition connections. */ + void optimizeStateOrdering( RedStateAp *state ); + void optimizeStateOrdering(); + + /* Ordering states by transition connections. */ + void depthFirstOrdering( RedStateAp *state ); + void depthFirstOrdering(); + + void breadthFirstAdd( RedStateAp *state ); + void breadthFirstOrdering(); + + void randomizedOrdering(); + +#ifdef SCORE_ORDERING + long **scores; + void scoreSecondPass( RedStateAp *state ); + void scoreOrderingBreadth(); + void readScores(); + void scoreOrderingDepth( RedStateAp *state ); + void scoreOrderingDepth(); +#endif + + /* Set state ids. */ + void sequentialStateIds(); + void sortStateIdsByFinal(); + + /* Arrange states in by final id. This is a stable sort. */ + void sortStatesByFinal(); + + /* Sorting states by id. */ + void sortByStateId(); + + /* Locating the first final state. This is the final state with the lowest + * id. */ + void findFirstFinState(); + + void assignActionLocs(); + + RedCondAp *getErrorCond(); + RedTransAp *getErrorTrans(); + RedStateAp *getErrorState(); + + /* Is every char in the alphabet covered? */ + bool alphabetCovered( RedTransList &outRange ); + + RedTransAp *allocateTrans( RedStateAp *targ, RedAction *action ); + RedTransAp *allocateTrans( GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ); + + RedCondAp *allocateCond( RedStateAp *targState, RedAction *actionTable ); + + void partitionFsm( int nParts ); + + void setInTrans(); +}; + +#endif diff --git a/ragel/reducer.cc b/ragel/reducer.cc new file mode 100644 index 00000000..592dcfe1 --- /dev/null +++ b/ragel/reducer.cc @@ -0,0 +1,230 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "reducer.h" + +#include <colm/colm.h> +#include <colm/tree.h> + +#include <errno.h> + +using std::endl; +using std::ifstream; + +void TopLevel::loadMachineName( string data ) +{ + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) + pd->fsmCtx->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; +} + +void TopLevel::tryMachineDef( const InputLoc &loc, std::string name, + MachineDef *machineDef, bool isInstance ) +{ + GraphDictEl *newEl = pd->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, machineDef ); + newEl->isInstance = isInstance; + newEl->loc = loc; + newEl->value->isExport = exportContext[exportContext.length()-1]; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + pd->id->error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +long TopLevel::tryLongScan( const InputLoc &loc, const char *data ) +{ + /* Convert the priority number to a long. Check for overflow. */ + long priorityNum; + errno = 0; + + long aug = strtol( data, 0, 10 ); + if ( errno == ERANGE && aug == LONG_MAX ) { + /* Priority number too large. Recover by setting the priority to 0. */ + pd->id->error(loc) << "priority number " << data << + " overflows" << endl; + priorityNum = 0; + } + else if ( errno == ERANGE && aug == LONG_MIN ) { + /* Priority number too large in the neg. Recover by using 0. */ + pd->id->error(loc) << "priority number " << data << + " underflows" << endl; + priorityNum = 0; + } + else { + /* No overflow or underflow. */ + priorityNum = aug; + } + + return priorityNum; +} + +void TopLevel::include( const InputLoc &incLoc, bool fileSpecified, string fileName, string machine ) +{ + /* Stash the current section name and pd. */ + string sectionName = pd->sectionName; + ParseData *pd0 = pd; + + const char **includeChecks = 0; + long found = 0; + + const char *inclSectionName = machine.c_str(); + + /* Default the section name to the current section name. */ + if ( inclSectionName == 0 ) + inclSectionName = sectionName.c_str(); + + /* Build the include checks. */ + if ( fileSpecified ) + includeChecks = pd->id->makeIncludePathChecks( curFileName, fileName.c_str() ); + else { + char *test = new char[strlen(curFileName)+1]; + strcpy( test, curFileName ); + + includeChecks = new const char*[2]; + + includeChecks[0] = test; + includeChecks[1] = 0; + } + + /* Try to find the file. */ + ifstream *inFile = pd->id->tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + id->error(incLoc) << "include: failed to locate file" << endl; + const char **tried = includeChecks; + while ( *tried != 0 ) + id->error(incLoc) << "include: attempted: \"" << *tried++ << '\"' << endl; + + return; + } + + delete inFile; + +// /* Don't include anything that's already been included. */ +// if ( !pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { +// pd->includeHistory.push_back( IncludeHistoryItem( +// includeChecks[found], inclSectionName ) ); +// +// /* Either we are not in the lib, or a file was specifed, use the +// * file-based include pass. */ +// includePass.reduceFile( includeChecks[found], id->hostLang ); +// } + + const char *targetMachine0 = targetMachine; + const char *searchMachine0 = searchMachine; + + includeDepth += 1; + pd = 0; + + targetMachine = sectionName.c_str(); + searchMachine = machine.c_str(); + + // reduceFile( includeChecks[found] ); + +// if ( includePass.incItems.length() == 0 ) { +// pd->id->error(incLoc) << "could not find machine " << machine << +// " in " << fileName << endp; +// } +// else { +// /* Load the data into include el. Save in the dict. */ +// loadIncludeData( el, includePass, includeChecks[found] ); +// id->includeDict.insert( el ); +// includePass.incItems.empty(); +// } + + pd = pd0; + includeDepth -= 1; + + targetMachine = targetMachine0; + searchMachine = searchMachine0; +} + +void TopLevel::import( const InputLoc &loc, std::string name, Literal *literal ) +{ + MachineDef *machineDef = new MachineDef( + new Join( + new Expression( + new Term( + new FactorWithAug( + new FactorWithRep( + new FactorWithNeg( new Factor( literal ) ) + ) + ) + ) + ) + ) + ); + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( loc, name, machineDef, false ); + machineDef->join->loc = loc; +} + +void TopLevel::reduceFile( const char *cmd, const char *inputFileName ) +{ + const int baseN = 2; + const char **argv = new const char*[baseN + id->includePaths.length() + 1]; + argv[0] = cmd; + argv[1] = inputFileName; + for ( int i = 0; i < id->includePaths.length(); i++ ) + argv[baseN + i] = id->includePaths.data[i]; + argv[baseN + id->includePaths.length()] = 0; + + const char *prevCurFileName = curFileName; + curFileName = inputFileName; + + colm_program *program = colm_new_program( frontendSections ); + colm_set_debug( program, 0 ); + colm_set_reduce_clean( program, 0 ); + colm_set_reduce_ctx( program, this ); + colm_run_program( program, baseN + id->includePaths.length(), argv ); + id->streamFileNames.append( colm_extract_fns( program ) ); + + int length = 0; + const char *err = colm_error( program, &length ); + if ( err != 0 ) { + // std::cout << "error" << std::endl; + id->error_plain() << string( err, length ) << std::endl; + } + + colm_delete_program( program ); + + curFileName = prevCurFileName; + + delete[] argv; +} diff --git a/ragel/reducer.h b/ragel/reducer.h new file mode 100644 index 00000000..0d0f1af1 --- /dev/null +++ b/ragel/reducer.h @@ -0,0 +1,120 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/pdarun.h> +#include <colm/bytecode.h> +#include <colm/defs.h> +#include <colm/input.h> +#include <colm/tree.h> +#include <colm/program.h> +#include <colm/colm.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <iostream> +#include <vector> +#include <string> + +#include "vector.h" +#include "inputdata.h" +#include "parsedata.h" + +#ifndef _REDUCER_H +#define _REDUCER_H + +char *unescape( const char *s, int slen ); +char *unescape( const char *s ); + +struct SectionPass; + +struct TopLevel +{ + TopLevel( struct colm_sections *frontendSections, InputData *id, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ) + : + frontendSections(frontendSections), + id(id), + section(0), + pd(0), + machineSpec(0), + machineName(0), + includeDepth(0), + hostLang(hostLang), + minimizeLevel(minimizeLevel), + minimizeOpt(minimizeOpt), + + /* Should be passed into the load, somehow. */ + targetMachine(0), + searchMachine(0), + paramList(0), + success(true), + isImport(false) + { + exportContext.append( false ); + } + + struct colm_sections *frontendSections; + InputData *id; + Section *section; + SectionPass *sectionPass; + ParseData *pd; + char *machineSpec; + char *machineName; + int includeDepth; + const HostLang *hostLang; + MinimizeLevel minimizeLevel; + MinimizeOpt minimizeOpt; + std::vector<std::string> writeArgs; + + /* Should this go in the parse data? Probably. */ + Vector<bool> exportContext; + + const char *curFileName; + + const char *targetMachine; + const char *searchMachine; + + ActionParamList *paramList; + bool success; + + /* Generated and called by colm. */ + void commit_reduce_forward( program_t *prg, tree_t **root, + struct pda_run *pda_run, parse_tree_t *pt ); + void read_reduce_forward( program_t *prg, FILE *file ); + + void loadMachineName( string data ); + void tryMachineDef( const InputLoc &loc, std::string name, + MachineDef *machineDef, bool isInstance ); + long tryLongScan( const InputLoc &loc, const char *data ); + void include( const InputLoc &incLoc, bool fileSpecified, string fileName, string machine ); + void reduceFile( const char *cmd, const char *inputFileName ); + + void import( const InputLoc &loc, std::string name, Literal *literal ); + void importFile( std::string fileName ); + + bool isImport; +}; + +#endif diff --git a/ragel/ril.lm b/ragel/ril.lm new file mode 100644 index 00000000..5b764b81 --- /dev/null +++ b/ragel/ril.lm @@ -0,0 +1,278 @@ +namespace host + lex + rl NL / '\n' / + + token escape + / '@' any / + + literal `={ `}= `${ `}$ `@{ `}@ + + token host_any / any / + end + + def tok + [`${ StmtList: stmt* `}$] :Stmt + | [`={ Expr: expr `}=] :Expr + | [escape] :Escape + | [host_any] :Any +end + +lex + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + rl c_comment + / '/*' ( any | NL )* :>> '*/' / + + rl cpp_comment + / '//' [^\n]* NL / + + literal `array `value `TRUE `FALSE + `while `switch `case + `if `else `offset `index + `goto `deref `entry `label `default + `host `cast `match `pat + + literal `uint `const + `s8 `s16 `s32 `s64 + `s128 `nil `export + `fallthrough `u `c `break `continue + + token ident + /( alpha | '_' ) ( alpha | digit | '_' )*/ + + token uint + / digit+ / + + token hex_number + / '0x' [0-9a-fA-F]+ / + + ignore + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + ignore / ( [ \t] | NL )+ / + + literal `$ `{ `} `= `[ `] + `- `, `. `; `( `) `: + `? `* `+ `> `< `& + `~ `! `!= `== `<< `>> + `+= `&& `|| `<= `>= + `@ `-= `-> `={ `${ `@{ +end + +def embedded_host + [`host `( string `, uint `) `={ TL: host::tok* host::`}=] :Expr +| [`host `( string `, uint `) `${ TL: host::tok* host::`}$] :Stmt +| [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] :Bare + +def type + [ident] :Ident +| [ident ident] :Ident2 +| [`uint] :Uint +| [`s8] :S8 +| [`s16] :S16 +| [`s32] :S32 +| [`s64] :S64 +| [`s128] :S128 + +def expr_factor + [embedded_host] :EmbeddedHost +| [ident] :Ident +| [ident `[ expr `]] :ArraySub +| [ident `[ expr `] `. Field: ident] :ArraySubField +| [`offset `( ident `, expr `)] :Offset +| [`deref `( ident `, expr `)] :Deref +| [number] :Number +| [`TRUE] :True +| [`FALSE] :False +| [`nil] :Nil +| [hex_number] :HexNumber +| [string] :String +| [embedded_host `-> expr_factor] :Access +| [`( expr `)] :Paren +| [`cast `( type `) expr_factor] :Cast + +def lvalue + [embedded_host] +| [ident] +| [ident `[ expr `]] +| [ident `[ expr `] `. ident] +| [embedded_host `-> lvalue] + +def expr_factor_op + [`! expr_factor_op] +| [`~ expr_factor_op] +| [expr_factor] + +def expr_bitwise + [expr_bitwise `& expr_factor_op] +| [expr_factor_op] + +def expr_mult + [expr_mult `* expr_bitwise] +| [expr_bitwise] + +def add_op + [`+] | [`-] + +def expr_add + [expr_add add_op expr_mult] +| [expr_mult] + +def shift_op + [`<<] | [`>>] + +def expr_shift + [expr_shift shift_op expr_add] +| [expr_add] + +def test_op + [`<] | [`>] | [`<=] | [`>=] | + [`==] | [`!=] | [`&&] | [`||] + +def expr_test + [expr_test test_op expr_shift] +| [expr_shift] + +def expr + [expr_test] + +def sint + [uint] +| [`- uint] + +def number + [`u `( uint `)] :Unsigned +| [`c `( uint `)] :Char +| [sint] :Number + +def comma_num + [`, number] + +def num_list + [number comma_num*] +| [] + +def static_array + [`array type ident `( number `, number `) `= `{ num_list `} `;] + +def static_value + [`value type ident `= number `;] + +def break_label + [ident `: `:] + +def while_stmt + [break_label? `while `( expr `) stmt] + +def else_if_clause + [`else `if `( expr `) stmt] + +def else_clause + [`else stmt] + +def if_stmt [ + `if `( expr `) stmt + else_if_clause* else_clause? +] + +def match_stmt + [`match `( E: expr `) `{ P: pat_block* D: default_block? `}] + +def pat_block + [`pat expr `{ stmt* `}] + +def switch_stmt + [`switch `( expr `) `{ stmt* `}] + +def case_block + [`case expr `{ stmt* `}] + +def default_block + [`default `{ stmt* `}] + +def case_label + [`case expr `:] + +def goto_label + [ident `:] + +def opt_init + [`= expr] +| [] + +def opt_ptr + [`*] +| [] + +def opt_const + [`const] +| [] + +def declaration + [opt_const type ident opt_init `;] + +def index_stmt + [`index type ident opt_init`;] + +def export_stmt + [`export type ident number `;] + +def goto_stmt + Id: int + [`goto ident `;] + +def fallthrough + [`fallthrough `;] + +def break_stmt + [`break ident? `;] + +def continue_stmt + [`continue ident? `;] + +def block + [`{ StmtList: stmt* `}] + +def expr_stmt + [expr `;] + +def assign_op + [`=] | [`+=] | [`-=] + +def assign_stmt + [LValue: lvalue assign_op expr `;] + +def stmt + [embedded_host] +| [static_array] +| [static_value] +| [declaration] +| [index_stmt] +| [export_stmt] +| [assign_stmt] +| [expr_stmt] +| [while_stmt] +| [if_stmt] +| [match_stmt] +| [switch_stmt] +| [case_block] +| [default_block] +| [case_label] +| [goto_label] +| [goto_stmt] +| [fallthrough] +| [break_stmt] +| [continue_stmt] +| [block] + +def start + [stmt*] diff --git a/ragel/rlhc-main.lm b/ragel/rlhc-main.lm new file mode 100644 index 00000000..f9abea7e --- /dev/null +++ b/ragel/rlhc-main.lm @@ -0,0 +1,19 @@ +InputFile: str = argv->pop() +OutputFile: str = argv->pop() + +# +# Open input and parse +# +Input: stream = open( InputFile, "r" ) +parse Start: start[ Input ] +if ( !Start ) { + print( error, '\n' ) + exit(1) +} + +# +# Translate +# +Output: stream = open( OutputFile, "w" ) +trans( Output, Start ) + diff --git a/ragel/rlparse.kh b/ragel/rlparse.kh new file mode 100644 index 00000000..e077d6a2 --- /dev/null +++ b/ragel/rlparse.kh @@ -0,0 +1,148 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _RLPARSE_H +#define _RLPARSE_H + +#include <iostream> +#include "avltree.h" +#include "parsedata.h" + + +/* Import scanner tokens. */ +#define IMP_Word 128 +#define IMP_Literal 129 +#define IMP_UInt 130 +#define IMP_Define 131 + +struct ParamList; + +struct TokHead +{ + TokHead *next; +}; + +struct Parser6 +{ +%%{ + parser Parser6; + + # General tokens. + token TK_Word, TK_Literal, TK_EndSection, TK_UInt, TK_Hex, + TK_Word, TK_Literal, TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, + TK_Arrow, TK_DoubleArrow, TK_StarStar, TK_ColonEquals, TK_BarEquals, + TK_NameSep, TK_BarStar, TK_DashDash, TK_DotDotIndep; + + # Conditions. + token TK_StartCond, TK_AllCond, TK_LeavingCond; + + # State embedding actions. + token TK_Middle; + + # Global error actions. + token TK_StartGblError, TK_AllGblError, TK_FinalGblError, + TK_NotFinalGblError, TK_NotStartGblError, TK_MiddleGblError; + + # Local error actions. + token TK_StartLocalError, TK_AllLocalError, TK_FinalLocalError, + TK_NotFinalLocalError, TK_NotStartLocalError, TK_MiddleLocalError; + + # EOF Action embedding. + token TK_StartEOF, TK_AllEOF, TK_FinalEOF, TK_NotFinalEOF, TK_NotStartEOF, + TK_MiddleEOF; + + # To State Actions. + token TK_StartToState, TK_AllToState, TK_FinalToState, TK_NotFinalToState, + TK_NotStartToState, TK_MiddleToState; + + # In State Actions. + token TK_StartFromState, TK_AllFromState, TK_FinalFromState, + TK_NotFinalFromState, TK_NotStartFromState, TK_MiddleFromState; + + token TK_ColonNfaOpen, TK_CloseColon, TK_ColonCondOpen, + TK_ColonCondStarOpen, TK_ColonCondPlusOpen, TK_ColonNoMaxOpen; + + # Regular expression tokens. */ + token RE_Slash, RE_SqOpen, RE_SqOpenNeg, RE_SqClose, RE_Dot, RE_Star, + RE_Dash, RE_Char; + + # Tokens specific to inline code. + token IL_WhiteSpace, IL_Comment, IL_Literal, IL_Symbol; + + # Keywords. + token KW_Machine, KW_Include, KW_Import, KW_Write, KW_Action, KW_AlphType, + KW_Range, KW_GetKey, KW_Include, KW_Write, KW_Machine, KW_InWhen, + KW_When, KW_OutWhen, KW_Eof, KW_Err, KW_Lerr, KW_To, KW_From, + KW_Export, KW_PrePush, KW_PostPop, KW_Length, KW_NfaPrePush, KW_NfaPostPop; + + # Specials in code blocks. + token KW_Break, KW_Exec, KW_Hold, KW_PChar, KW_Char, KW_Goto, KW_Call, + KW_Ret, KW_CurState, KW_TargState, KW_Entry, KW_Next, KW_Exec, + KW_Variable, KW_Access, KW_Ncall, KW_Nret, KW_Nbreak; + + token TK_SubstRef; +}%% + + %% write instance_data; + + void init(); + int parseLangEl( int type, const Token *token ); + void clear(); + + Parser6( InputData *id, const char *fileName, char *sectionName, + const InputLoc §ionLoc, const HostLang *hostLang, + MinimizeLevel minimizeLevel, + MinimizeOpt minimizeOpt ); + + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); + void tryMachineDef( const InputLoc &loc, char *name, + MachineDef *machineDef, bool isInstance ); + + /* Report an error encountered by the parser. */ + ostream &parse_error( int tokId, Token &token ); + + ParseData *pd; + + /* The name of the root section, this does not change during an include. */ + char *sectionName; + const HostLang *hostLang; + + NameRef nameRef; + NameRefList nameRefList; + + Vector<bool> exportContext; + + TokHead *tokHead; + ActionParamList *paramList; + + Parser6 *prev, *next; + + void terminateParser(); + + bool parseSubstitutions; +}; + +%% write token_defs; + +void clearTokdata( Parser6 *parser ); + +#endif diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl new file mode 100644 index 00000000..7f69ab3e --- /dev/null +++ b/ragel/rlparse.kl @@ -0,0 +1,1943 @@ +/* + * Copyright 2001-2016 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rlparse.h" +#include "ragel.h" +#include "inputdata.h" +#include <iostream> +#include <errno.h> +#include <stdlib.h> + +using std::endl; + +Parser6::Parser6( InputData *id, const char *fileName, char *sectionName, + const InputLoc §ionLoc, const HostLang *hostLang, + MinimizeLevel minimizeLevel, + MinimizeOpt minimizeOpt ) +: + sectionName(sectionName), + hostLang(hostLang), + tokHead(0), + parseSubstitutions(false) +{ + pd = new ParseData( id, std::string(sectionName), + id->nextMachineId++, sectionLoc, hostLang, minimizeLevel, minimizeOpt ); + exportContext.append( false ); + + pd->includeHistory.push_back( IncludeHistoryItem( fileName, sectionName ) ); +} + +%%{ + +parser Parser6; + +include "rlparse.kh"; + +start: section_list; + +section_list: section_list statement_list TK_EndSection; +section_list: ; + +statement_list: statement_list statement; +statement_list: ; + +statement: assignment commit; +statement: instantiation commit; +statement: nfa_union commit; +statement: action_spec commit; +statement: alphtype_spec commit; +statement: range_spec commit; +statement: getkey_spec commit; +statement: access_spec commit; +statement: variable_spec commit; +statement: export_block commit; +statement: pre_push_spec commit; +statement: post_pop_spec commit; +statement: nfa_pre_push_spec commit; +statement: nfa_post_pop_spec commit; +statement: length_spec commit; + +length_spec: + KW_Length TK_Word ';' + final { + LengthDef *lengthDef = new LengthDef( $2->data ); + pd->lengthDefList.append( lengthDef ); + + /* Generic creation of machine for instantiation and assignment. */ + MachineDef *machineDef = new MachineDef( lengthDef ); + tryMachineDef( $2->loc, $2->data, machineDef, false ); + }; + +pre_push_spec: + KW_PrePush '{' inline_block '}' + final { + if ( pd->fsmCtx->prePushExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error($2->loc) << "pre_push code already defined" << endl; + } + + pd->fsmCtx->prePushExpr = new InlineBlock( $2->loc, $3->inlineList ); + }; + + +post_pop_spec: + KW_PostPop '{' inline_block '}' + final { + if ( pd->fsmCtx->postPopExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error($2->loc) << "post_pop code already defined" << endl; + } + + pd->fsmCtx->postPopExpr = new InlineBlock( $2->loc, $3->inlineList ); + }; + +nfa_pre_push_spec: + KW_NfaPrePush '{' inline_block '}' + final { + if ( pd->fsmCtx->nfaPrePushExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error($2->loc) << "nfa_pre_push code already defined" << endl; + } + + pd->fsmCtx->nfaPrePushExpr = new InlineBlock( $2->loc, $3->inlineList ); + }; + +nfa_post_pop_spec: + KW_NfaPostPop '{' inline_block '}' + final { + if ( pd->fsmCtx->nfaPostPopExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error($2->loc) << "nfa_post_pop code already defined" << endl; + } + + pd->fsmCtx->nfaPostPopExpr = new InlineBlock( $2->loc, $3->inlineList ); + }; + +export_open: KW_Export + final { + exportContext.append( true ); + }; + +nonterm opt_export +{ + bool isSet; +}; + +opt_export: export_open final { $$->isSet = true; }; +opt_export: final { $$->isSet = false; }; + +export_block: export_open '{' statement_list '}' + final { + exportContext.remove( exportContext.length()-1 ); + }; + +assignment: + opt_export machine_name '=' join ';' final { + /* Main machine must be an instance. */ + bool isInstance = false; + if ( strcmp($2->token.data, mainMachine) == 0 ) { + pd->id->warning($2->token.loc) << + "main machine will be implicitly instantiated" << endl; + isInstance = true; + } + + /* Generic creation of machine for instantiation and assignment. */ + MachineDef *machineDef = new MachineDef( $4->join ); + tryMachineDef( $2->token.loc, $2->token.data, machineDef, isInstance ); + + if ( $1->isSet ) + exportContext.remove( exportContext.length()-1 ); + + $4->join->loc = $3->loc; + }; + +instantiation: + opt_export machine_name TK_ColonEquals join_or_lm ';' final { + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( $2->token.loc, $2->token.data, $4->machineDef, true ); + + if ( $1->isSet ) + exportContext.remove( exportContext.length()-1 ); + + /* Pass a location to join_or_lm */ + if ( $4->machineDef->join != 0 ) + $4->machineDef->join->loc = $3->loc; + }; + +nonterm nfa_round_spec +{ + long depth; + long grouping; +}; + +nfa_round_spec: + TK_UInt ',' TK_UInt + final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + $$->depth = strtol( $1->data, 0, 10 ); + if ( $$->depth == LONG_MAX && errno == ERANGE ) + pd->id->error($1->loc) << "rounds " << $1->data << " overflows" << endl; + + $$->grouping = strtol( $3->data, 0, 10 ); + if ( $$->grouping == LONG_MAX && errno == ERANGE ) + pd->id->error($3->loc) << "grouping " << $3->data << " overflows" << endl; + }; + +nonterm nfa_round_list +{ + NfaRoundVect *roundsList; +}; + +nfa_round_list: + nfa_round_list ',' nfa_round_spec + final { + $$->roundsList = $1->roundsList; + $$->roundsList->append( + NfaRound( $3->depth, $3->grouping ) ); + }; + +nfa_round_list: + nfa_round_spec + final { + $$->roundsList = new NfaRoundVect; + $$->roundsList->append( + NfaRound( $1->depth, $1->grouping ) ); + }; + +nonterm nfa_rounds +{ + NfaRoundVect *roundsList; +}; + +nfa_rounds: + '(' nfa_round_list ')' + final { + $$->roundsList = $2->roundsList; + }; + +nonterm nfa_expr +{ + NfaUnion *nfaUnion; +}; + +nfa_expr: + nfa_expr '|' term_short final { + $$->nfaUnion = $1->nfaUnion; + $$->nfaUnion->terms.append( $3->term ); + }; +nfa_expr: + term_short final { + $$->nfaUnion = new NfaUnion(); + $$->nfaUnion->terms.append( $1->term ); + }; + +nfa_union: + machine_name TK_BarEquals nfa_rounds nfa_expr ';' final { + $4->nfaUnion->roundsList = $3->roundsList; + MachineDef *machineDef = new MachineDef( $4->nfaUnion ); + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( $1->token.loc, $1->token.data, machineDef, true ); + }; + + +type token_type +{ + Token token; +}; + +nonterm machine_name uses token_type; + +machine_name: + TK_Word final { + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) + pd->fsmCtx->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + + $$->token = *$1; + }; + +nonterm action_param +{ + ActionParam *param; +}; + +action_param: + TK_Word + final { + $$->param = new ActionParam( $1->data ); + }; + +nonterm action_param_list +{ + ActionParamList *paramList; +}; + +action_param_list: + action_param_list ',' action_param + final { + $$->paramList = $1->paramList; + $$->paramList->append( $3->param ); + }; + +action_param_list: + action_param + final { + $$->paramList = new ActionParamList; + $$->paramList->append( $1->param ); + }; + +nonterm opt_action_param_list uses action_param_list; + +opt_action_param_list: + action_param_list + final { + $$->paramList = $1->paramList; + }; + +opt_action_param_list: + final { + $$->paramList = new ActionParamList; + }; + +nonterm opt_action_params uses action_param_list; + +opt_action_params: + '(' opt_action_param_list ')' + try { + parseSubstitutions = true; + } + final { + $$->paramList = $2->paramList; + paramList = $2->paramList; + }; + +opt_action_params: + final { + $$->paramList = 0; + }; + +action_spec: + KW_Action TK_Word opt_action_params '{' inline_block '}' + final { + if ( pd->actionDict.find( $2->data ) ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error($2->loc) << "action \"" << $2->data << "\" already defined" << endl; + } + else { + /* Add the action to the list of actions. */ + Action *newAction = new Action( $4->loc, $2->data, + $5->inlineList, pd->fsmCtx->nextCondId++ ); + + /* Insert to list and dict. */ + pd->fsmCtx->actionList.append( newAction ); + pd->actionDict.insert( newAction ); + + newAction->paramList = $3->paramList; + if ( $3->paramList != 0 ) + newAction->argListMap = new ActionArgListMap; + } + parseSubstitutions = false; + }; + +# Specifies the data type of the input alphabet. One or two words followed by a +# semi-colon. +alphtype_spec: + KW_AlphType TK_Word TK_Word ';' final { + if ( ! pd->setAlphType( $1->loc, hostLang, $2->data, $3->data ) ) { + // Recover by ignoring the alphtype statement. + pd->id->error($2->loc) << "\"" << $2->data << + " " << $3->data << "\" is not a valid alphabet type" << endl; + } + }; + +alphtype_spec: + KW_AlphType TK_Word ';' final { + if ( ! pd->setAlphType( $1->loc, hostLang, $2->data ) ) { + // Recover by ignoring the alphtype statement. + pd->id->error($2->loc) << "\"" << $2->data << + "\" is not a valid alphabet type" << endl; + } + }; + +# Specifies a range to assume that the input characters will fall into. +range_spec: + KW_Range alphabet_num alphabet_num ';' final { + // Save the upper and lower ends of the range and emit the line number. + pd->lowerNum = $2->token.data; + pd->upperNum = $3->token.data; + pd->rangeLowLoc = $2->token.loc; + pd->rangeHighLoc = $3->token.loc; + }; + +getkey_spec: + KW_GetKey inline_expr ';' final { + pd->fsmCtx->getKeyExpr = $2->inlineList; + }; + +access_spec: + KW_Access inline_expr ';' final { + pd->fsmCtx->accessExpr = $2->inlineList; + }; + +variable_spec: + KW_Variable opt_whitespace TK_Word inline_expr ';' final { + /* FIXME: Need to implement the rest of this. */ + bool wasSet = pd->setVariable( $3->data, $4->inlineList ); + if ( !wasSet ) + pd->id->error($3->loc) << "bad variable name" << endl; + }; + +opt_whitespace: opt_whitespace IL_WhiteSpace; +opt_whitespace: ; + +# +# Expressions +# + +nonterm join_or_lm +{ + MachineDef *machineDef; +}; + +join_or_lm: + join final { + $$->machineDef = new MachineDef( $1->join ); + }; +join_or_lm: + TK_BarStar lm_part_list '*' '|' final { + /* Create a new factor going to a longest match structure. Record + * in the parse data that we have a longest match. */ + LongestMatch *lm = new LongestMatch( $1->loc, $2->lmPartList ); + pd->lmList.append( lm ); + for ( LmPartList::Iter lmp = *($2->lmPartList); lmp.lte(); lmp++ ) + lmp->longestMatch = lm; + $$->machineDef = new MachineDef( lm ); + }; + +nonterm lm_part_list +{ + LmPartList *lmPartList; +}; + +lm_part_list: + lm_part_list longest_match_part + final { + if ( $2->lmPart != 0 ) + $1->lmPartList->append( $2->lmPart ); + $$->lmPartList = $1->lmPartList; + }; +lm_part_list: + longest_match_part + final { + /* Create a new list with the part. */ + $$->lmPartList = new LmPartList; + if ( $1->lmPart != 0 ) + $$->lmPartList->append( $1->lmPart ); + }; + +nonterm longest_match_part +{ + LongestMatchPart *lmPart; +}; + +longest_match_part: + action_spec commit + final { + $$->lmPart = 0; + }; +longest_match_part: + assignment commit + final { + $$->lmPart = 0; + }; +longest_match_part: + join opt_lm_part_action ';' commit + final { + $$->lmPart = 0; + Action *action = $2->action; + if ( action != 0 ) + action->isLmAction = true; + $$->lmPart = new LongestMatchPart( $1->join, action, + $3->loc, pd->nextLongestMatchId++ ); + + /* Provide a location to join. Unfortunately We don't + * have the start of the join as in other occurances. Use the end. */ + $1->join->loc = $3->loc; + }; + +nonterm opt_lm_part_action +{ + Action *action; +}; + +opt_lm_part_action: + TK_DoubleArrow action_embed final { + $$->action = $2->action; + }; +opt_lm_part_action: + action_embed_block final { + $$->action = $1->action; + }; +opt_lm_part_action: + final { + $$->action = 0; + }; + + +nonterm join +{ + Join *join; +}; + +join: + join ',' expression final { + /* Append the expression to the list and return it. */ + $1->join->exprList.append( $3->expression ); + $$->join = $1->join; + }; +join: + expression final { + $$->join = new Join( $1->expression ); + }; + +nonterm expression +{ + Expression *expression; +}; + +expression: + expression '|' term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::OrType ); + }; +expression: + expression '&' term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::IntersectType ); + }; +expression: + expression '-' term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::SubtractType ); + }; +expression: + expression TK_DashDash term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::StrongSubtractType ); + }; +expression: + term_short final { + $$->expression = new Expression( $1->term ); + }; + +# This is where we resolve the ambiguity involving -. By default ragel tries to +# do a longest match, which gives precedence to a concatenation because it is +# innermost. What we need is to force term into a shortest match so that when - +# is seen it doesn't try to extend term with a concatenation, but ends term and +# goes for a subtraction. +# +# The shortest tag overrides the default longest match action ordering strategy +# and instead forces a shortest match stragegy. The wrap the term production in +# a new nonterminal 'term_short' to guarantee the shortest match behaviour. + +shortest term_short; +nonterm term_short +{ + Term *term; +}; + +term_short: + term final { + $$->term = $1->term; + }; + +nonterm term +{ + Term *term; +}; + +term: + term factor_with_label final { + $$->term = new Term( $1->term, $2->factorWithAug ); + }; +term: + term '.' factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug ); + }; +term: + term TK_ColonGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType ); + }; +term: + term TK_ColonGtGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType ); + }; +term: + term TK_LtColon factor_with_label final { + $$->term = new Term( $1->term, + $3->factorWithAug, Term::LeftType ); + }; +term: + factor_with_label final { + $$->term = new Term( $1->factorWithAug ); + }; + +nonterm factor_with_label +{ + FactorWithAug *factorWithAug; +}; + +factor_with_label: + TK_Word ':' factor_with_label final { + /* Add the label to the list and pass the factor up. */ + $3->factorWithAug->labels.insert( $3->factorWithAug->labels.begin(), Label($1->loc, $1->data) ); + $$->factorWithAug = $3->factorWithAug; + }; +factor_with_label: + factor_with_ep final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_ep +{ + FactorWithAug *factorWithAug; +}; + +factor_with_ep: + factor_with_ep TK_Arrow local_state_ref final { + /* Add the target to the list and return the factor object. */ + $1->factorWithAug->epsilonLinks.append( EpsilonLink( $2->loc, new NameRef(nameRef) ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_ep: + factor_with_aug final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_aug +{ + FactorWithAug *factorWithAug; +}; + +factor_with_aug: + factor_with_aug aug_type_base action_embed final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( + ParserAction( $2->loc, $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_base priority_aug final { + /* Append the named priority to the factorWithAug and pass it up. */ + $1->factorWithAug->priorityAugs.append( + PriorityAug( $2->augType, pd->curDefPriorKey, $3->priorityNum ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_base '(' priority_name ',' priority_aug ')' final { + /* Append the priority using a default name. */ + $1->factorWithAug->priorityAugs.append( + PriorityAug( $2->augType, $4->priorityName, $6->priorityNum ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_cond action_embed final { + $1->factorWithAug->conditions.append( ConditionTest( $2->loc, + $2->augType, $3->action, true ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_cond '!' action_embed final { + $1->factorWithAug->conditions.append( ConditionTest( $2->loc, + $2->augType, $4->action, false ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_to_state action_embed final { + /* Append the action, pass it up. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_from_state action_embed final { + /* Append the action, pass it up. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_eof action_embed final { + /* Append the action, pass it up. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, 0, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_gbl_error action_embed final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, pd->curDefLocalErrKey, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_local_error action_embed final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, pd->curDefLocalErrKey, $3->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_aug aug_type_local_error '(' local_err_name ',' action_embed ')' final { + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $1->factorWithAug->actions.append( ParserAction( $2->loc, + $2->augType, $4->error_name, $6->action ) ); + $$->factorWithAug = $1->factorWithAug; + }; +factor_with_aug: + factor_with_rep final { + $$->factorWithAug = new FactorWithAug( $1->factorWithRep ); + }; + +type aug_type +{ + ParserLoc loc; + AugType augType; +}; + +# Classes of transtions on which to embed actions or change priorities. +nonterm aug_type_base uses aug_type; + +aug_type_base: '@' final { $$->loc = $1->loc; $$->augType = at_finish; }; +aug_type_base: '%' final { $$->loc = $1->loc; $$->augType = at_leave; }; +aug_type_base: '$' final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_base: '>' final { $$->loc = $1->loc; $$->augType = at_start; }; + +# Embedding conditions. +nonterm aug_type_cond uses aug_type; + +aug_type_cond: TK_StartCond final { $$->loc = $1->loc; $$->augType = at_start; }; +aug_type_cond: '>' KW_When final { $$->loc = $1->loc; $$->augType = at_start; }; +aug_type_cond: TK_AllCond final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_cond: '$' KW_When final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_cond: TK_LeavingCond final { $$->loc = $1->loc; $$->augType = at_leave; }; +aug_type_cond: '%' KW_When final { $$->loc = $1->loc; $$->augType = at_leave; }; +aug_type_cond: KW_When final { $$->loc = $1->loc; $$->augType = at_all; }; +aug_type_cond: KW_InWhen final { $$->loc = $1->loc; $$->augType = at_start; }; +aug_type_cond: KW_OutWhen final { $$->loc = $1->loc; $$->augType = at_leave; }; + +# +# To state actions. +# + +nonterm aug_type_to_state uses aug_type; + +aug_type_to_state: TK_StartToState + final { $$->loc = $1->loc; $$->augType = at_start_to_state; }; +aug_type_to_state: '>' KW_To + final { $$->loc = $1->loc; $$->augType = at_start_to_state; }; + +aug_type_to_state: TK_NotStartToState + final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; }; +aug_type_to_state: '<' KW_To + final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; }; + +aug_type_to_state: TK_AllToState + final { $$->loc = $1->loc; $$->augType = at_all_to_state; }; +aug_type_to_state: '$' KW_To + final { $$->loc = $1->loc; $$->augType = at_all_to_state; }; + +aug_type_to_state: TK_FinalToState + final { $$->loc = $1->loc; $$->augType = at_final_to_state; }; +aug_type_to_state: '%' KW_To + final { $$->loc = $1->loc; $$->augType = at_final_to_state; }; + +aug_type_to_state: TK_NotFinalToState + final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; }; +aug_type_to_state: '@' KW_To + final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; }; + +aug_type_to_state: TK_MiddleToState + final { $$->loc = $1->loc; $$->augType = at_middle_to_state; }; +aug_type_to_state: TK_Middle KW_To + final { $$->loc = $1->loc; $$->augType = at_middle_to_state; }; + +# +# From state actions. +# + +nonterm aug_type_from_state uses aug_type; + +aug_type_from_state: TK_StartFromState + final { $$->loc = $1->loc; $$->augType = at_start_from_state; }; +aug_type_from_state: '>' KW_From + final { $$->loc = $1->loc; $$->augType = at_start_from_state; }; + +aug_type_from_state: TK_NotStartFromState + final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; }; +aug_type_from_state: '<' KW_From + final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; }; + +aug_type_from_state: TK_AllFromState + final { $$->loc = $1->loc; $$->augType = at_all_from_state; }; +aug_type_from_state: '$' KW_From + final { $$->loc = $1->loc; $$->augType = at_all_from_state; }; + +aug_type_from_state: TK_FinalFromState + final { $$->loc = $1->loc; $$->augType = at_final_from_state; }; +aug_type_from_state: '%' KW_From + final { $$->loc = $1->loc; $$->augType = at_final_from_state; }; + +aug_type_from_state: TK_NotFinalFromState + final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; }; +aug_type_from_state: '@' KW_From + final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; }; + +aug_type_from_state: TK_MiddleFromState + final { $$->loc = $1->loc; $$->augType = at_middle_from_state; }; +aug_type_from_state: TK_Middle KW_From + final { $$->loc = $1->loc; $$->augType = at_middle_from_state; }; + +# +# Eof state actions. +# + +nonterm aug_type_eof uses aug_type; + +aug_type_eof: TK_StartEOF + final { $$->loc = $1->loc; $$->augType = at_start_eof; }; +aug_type_eof: '>' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_start_eof; }; + +aug_type_eof: TK_NotStartEOF + final { $$->loc = $1->loc; $$->augType = at_not_start_eof; }; +aug_type_eof: '<' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_not_start_eof; }; + +aug_type_eof: TK_AllEOF + final { $$->loc = $1->loc; $$->augType = at_all_eof; }; +aug_type_eof: '$' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_all_eof; }; + +aug_type_eof: TK_FinalEOF + final { $$->loc = $1->loc; $$->augType = at_final_eof; }; +aug_type_eof: '%' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_final_eof; }; + +aug_type_eof: TK_NotFinalEOF + final { $$->loc = $1->loc; $$->augType = at_not_final_eof; }; +aug_type_eof: '@' KW_Eof + final { $$->loc = $1->loc; $$->augType = at_not_final_eof; }; + +aug_type_eof: TK_MiddleEOF + final { $$->loc = $1->loc; $$->augType = at_middle_eof; }; +aug_type_eof: TK_Middle KW_Eof + final { $$->loc = $1->loc; $$->augType = at_middle_eof; }; + +# +# Global error actions. +# + +nonterm aug_type_gbl_error uses aug_type; + +aug_type_gbl_error: TK_StartGblError + final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; }; +aug_type_gbl_error: '>' KW_Err + final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; }; + +aug_type_gbl_error: TK_NotStartGblError + final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; }; +aug_type_gbl_error: '<' KW_Err + final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; }; + +aug_type_gbl_error: TK_AllGblError + final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; }; +aug_type_gbl_error: '$' KW_Err + final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; }; + +aug_type_gbl_error: TK_FinalGblError + final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; }; +aug_type_gbl_error: '%' KW_Err + final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; }; + +aug_type_gbl_error: TK_NotFinalGblError + final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; }; +aug_type_gbl_error: '@' KW_Err + final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; }; + +aug_type_gbl_error: TK_MiddleGblError + final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; }; +aug_type_gbl_error: TK_Middle KW_Err + final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; }; + + +# +# Local error actions. +# + +nonterm aug_type_local_error uses aug_type; + +aug_type_local_error: TK_StartLocalError + final { $$->loc = $1->loc; $$->augType = at_start_local_error; }; +aug_type_local_error: '>' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_start_local_error; }; + +aug_type_local_error: TK_NotStartLocalError + final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; }; +aug_type_local_error: '<' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; }; + +aug_type_local_error: TK_AllLocalError + final { $$->loc = $1->loc; $$->augType = at_all_local_error; }; +aug_type_local_error: '$' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_all_local_error; }; + +aug_type_local_error: TK_FinalLocalError + final { $$->loc = $1->loc; $$->augType = at_final_local_error; }; +aug_type_local_error: '%' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_final_local_error; }; + +aug_type_local_error: TK_NotFinalLocalError + final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; }; +aug_type_local_error: '@' KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; }; + +aug_type_local_error: TK_MiddleLocalError + final { $$->loc = $1->loc; $$->augType = at_middle_local_error; }; +aug_type_local_error: TK_Middle KW_Lerr + final { $$->loc = $1->loc; $$->augType = at_middle_local_error; }; + + +type action_ref +{ + Action *action; +}; + +# Different ways to embed actions. A TK_Word is reference to an action given by +# the user as a statement in the fsm specification. An action can also be +# specified immediately. +nonterm action_embed uses action_ref; + +action_embed: named_action_ref final { $$->action = $1->action; }; +action_embed: '(' named_action_ref ')' final { $$->action = $2->action; }; +action_embed: action_embed_block final { $$->action = $1->action; }; + +nonterm action_arg_list +{ + ActionArgList *argList; +}; + +action_arg_list: + action_arg_list ',' action_embed + final { + $$->argList = $1->argList; + $$->argList->append( $3->action ); + }; +action_arg_list: + action_embed + final { + $$->argList = new ActionArgList; + $$->argList->append( $1->action ); + }; + +nonterm opt_action_arg_list uses action_arg_list; + +opt_action_arg_list: + action_arg_list + final + { + $$->argList = $1->argList; + }; +opt_action_arg_list: + final { + $$->argList = new ActionArgList; + }; + +nonterm named_action_ref uses action_ref; + +named_action_ref: + TK_Word + try { + /* Set the name in the actionDict. */ + Action *action = pd->actionDict.find( $1->data ); + if ( action != 0 ) { + /* Pass up the action element */ + $$->action = action; + if ( action->paramList != 0 ) + reject(); + } + else { + /* Will recover by returning null as the action. */ + pd->id->error($1->loc) << "action lookup of \"" << $1->data << "\" failed" << endl; + $$->action = 0; + } + }; +named_action_ref: + TK_Word '(' opt_action_arg_list ')' + try { + /* Set the name in the actionDict. */ + Action *action = pd->actionDict.find( $1->data ); + if ( action != 0 ) { + + if ( action->paramList == 0 ) + reject(); + + /* + * Store the action we resolved. In the final action we will + * convert this to the specialized action. Can't do this here since + * it is a try action and we have not processed the args list (all + * done by final actions ). + */ + $$->action = action; + } + else { + /* Will recover by returning null as the action. */ + pd->id->error($1->loc) << "action lookup of \"" << $1->data << "\" failed" << endl; + $$->action = 0; + } + } + final { + /* Make sure the number of actions line up. */ + if ( $3->argList->length() != $$->action->paramList->length() ) { + pd->id->error($1->loc) << "wrong number of action " + "arguments for \"" << $1->data << "\"" << endl; + } + + /* Now we need to specialize using the supplied args. We can only + * present an Action* to fsmcodegen. */ + ActionArgListMapEl *el = $$->action->argListMap->find( $3->argList ); + if ( el == 0 ) { + /* Allocate an action representing this specialization. */ + Action *specAction = Action::cons( $1->loc, $$->action, + $3->argList, pd->fsmCtx->nextCondId++ ); + pd->fsmCtx->actionList.append( specAction ); + + el = $$->action->argListMap->insert( $3->argList, specAction ); + } + else { + /* Can delete $3->arg list. */ + delete $3->argList; + } + + $$->action = el->value; + }; + +nonterm action_embed_block uses action_ref; + +action_embed_block: + '{' inline_block '}' final { + /* Create the action, add it to the list and pass up. */ + Action *newAction = new Action( $1->loc, std::string(), + $2->inlineList, pd->fsmCtx->nextCondId++ ); + pd->fsmCtx->actionList.append( newAction ); + $$->action = newAction; + }; + +nonterm priority_name +{ + int priorityName; +}; + +# A specified priority name. Looks up the name in the current priority +# dictionary. +priority_name: + TK_Word final { + // Lookup/create the priority key. + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) + pd->fsmCtx->nextPriorKey += 1; + + // Use the inserted/found priority key. + $$->priorityName = priorDictEl->value; + }; + +nonterm priority_aug +{ + int priorityNum; +}; + +# Priority change specs. +priority_aug: + priority_aug_num final { + char *data = $1->token.data; + if ( $1->pos || $1->neg ) { + data = new char[$1->token.length + 2]; + data[0] = $1->pos ? '+' : '-'; + memcpy( data + 1, $1->token.data, $1->token.length ); + data[$1->token.length + 1] = 0; + } + + // Convert the priority number to a long. Check for overflow. + errno = 0; + long aug = strtol( data, 0, 10 ); + if ( errno == ERANGE && aug == LONG_MAX ) { + /* Priority number too large. Recover by setting the priority to 0. */ + pd->id->error($1->token.loc) << "priority number " << data << + " overflows" << endl; + $$->priorityNum = 0; + } + else if ( errno == ERANGE && aug == LONG_MIN ) { + /* Priority number too large in the neg. Recover by using 0. */ + pd->id->error($1->token.loc) << "priority number " << data << + " underflows" << endl; + $$->priorityNum = 0; + } + else { + /* No overflow or underflow. */ + $$->priorityNum = aug; + } + + if ( $1->pos || $1->neg ) + delete[] data; + }; + +nonterm priority_aug_num +{ + bool neg; + bool pos; + Token token; +}; + + +priority_aug_num: + TK_UInt final { + $$->pos = false; + $$->neg = false; + $$->token = *$1; + }; +priority_aug_num: + '+' TK_UInt final { + $$->pos = true; + $$->neg = false; + $$->token.set( $2->data, $2->length, $1->loc ); + }; +priority_aug_num: + '-' TK_UInt final { + $$->pos = false; + $$->neg = true; + $$->token.set( $2->data, $2->length, $1->loc ); + }; + +nonterm local_err_name +{ + int error_name; +}; + +local_err_name: + TK_Word final { + /* Lookup/create the priority key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + + /* Use the inserted/found priority key. */ + $$->error_name = localErrDictEl->value; + }; + + + +# The fourth level of precedence. These are the trailing unary operators that +# allow for repetition. + +nonterm factor_with_rep +{ + FactorWithRep *factorWithRep; +}; + +factor_with_rep: + factor_with_rep '*' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarType ); + }; +factor_with_rep: + factor_with_rep TK_StarStar final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarStarType ); + }; +factor_with_rep: + factor_with_rep '?' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::OptionalType ); + }; +factor_with_rep: + factor_with_rep '+' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::PlusType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::ExactType ); + }; +factor_with_rep: + factor_with_rep '{' ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, $4->rep, FactorWithRep::MaxType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::MinType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, $5->rep, FactorWithRep::RangeType ); + }; +factor_with_rep: + factor_with_neg final { + $$->factorWithRep = new FactorWithRep( $1->factorWithNeg ); + }; + +nonterm factor_rep_num +{ + int rep; +}; + +factor_rep_num: + TK_UInt final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + long rep = strtol( $1->data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + pd->id->error($1->loc) << "repetition number " << $1->data << " overflows" << endl; + $$->rep = 1; + } + else { + // Cannot be negative, so no overflow. + $$->rep = rep; + } + }; + + +# +# The fifth level up in precedence. Negation. +# + +nonterm factor_with_neg +{ + FactorWithNeg *factorWithNeg; +}; + +factor_with_neg: + '!' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::NegateType ); + }; +factor_with_neg: + '^' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::CharNegateType ); + }; +factor_with_neg: + factor final { + $$->factorWithNeg = new FactorWithNeg( $1->factor ); + }; + +nonterm factor +{ + Factor *factor; +}; + +factor: + TK_Literal final { + /* Create a new factor node going to a concat literal. */ + $$->factor = new Factor( new Literal( $1->loc, false, $1->data, + $1->length, Literal::LitString ) ); + }; +factor: + alphabet_num final { + /* Create a new factor node going to a literal number. */ + $$->factor = new Factor( new Literal( $1->token.loc, $1->neg, + $1->token.data, $1->token.length, Literal::Number ) ); + }; +factor: + TK_Word final { + /* Find the named graph. */ + GraphDictEl *gdNode = pd->graphDict.find( $1->data ); + if ( gdNode == 0 ) { + /* Recover by returning null as the factor node. */ + pd->id->error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; + $$->factor = 0; + } + else if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + pd->id->error($1->loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + $$->factor = new Factor( $1->loc, gdNode->value ); + } + }; +factor: + RE_SqOpen regular_expr_or_data RE_SqClose final { + /* Create a new factor node going to an OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); + }; +factor: + RE_SqOpenNeg regular_expr_or_data RE_SqClose final { + /* Create a new factor node going to a negated OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); + }; +factor: + RE_Slash regular_expr RE_Slash final { + bool caseInsensitive = false; + checkLitOptions( pd->id, $3->loc, $3->data, $3->length, caseInsensitive ); + if ( caseInsensitive ) + $2->regExpr->caseInsensitive = true; + + /* Create a new factor node going to a regular exp. */ + $$->factor = new Factor( $2->regExpr ); + }; +factor: + range_lit TK_DotDot range_lit final { + /* Create a new factor node going to a range. */ + $$->factor = new Factor( new Range( $1->literal, $3->literal, false ) ); + }; +factor: + range_lit TK_DotDotIndep range_lit final { + /* Create a new factor node going to a range. */ + $$->factor = new Factor( new Range( $1->literal, $3->literal, true ) ); + }; +factor: + TK_ColonNfaOpen expression ',' action_embed ',' + action_embed ',' action_embed ',' action_embed ',' action_embed ',' + action_embed TK_CloseColon + final { + /* push, pop, ini, stay, repeat, exit */ + $$->factor = new Factor( $1->loc, pd->nextRepId++, $2->expression, + $4->action, $6->action, $8->action, $10->action, $12->action, $14->action, + Factor::NfaRep ); + }; + + +nonterm colon_cond +{ + Factor::Type type; + ParserLoc loc; +}; + +colon_cond: + TK_ColonCondOpen + final { + $$->type = Factor::CondStar; + $$->loc = $1->loc; + }; +colon_cond: + TK_ColonCondStarOpen + final { + $$->type = Factor::CondStar; + $$->loc = $1->loc; + }; +colon_cond: + TK_ColonCondPlusOpen + final { + $$->type = Factor::CondPlus; + $$->loc = $1->loc; + }; + +nonterm opt_max_arg +{ + Action *action; +}; + +opt_max_arg: + ',' action_embed + final + { + $$->action = $2->action; + + }; +opt_max_arg: + final + { + $$->action = 0; + }; + +factor: + colon_cond expression ',' action_embed ',' + action_embed ',' action_embed opt_max_arg TK_CloseColon + final { + /* ini, inc, min, max */ + $$->factor = new Factor( $1->loc, pd->nextRepId++, $2->expression, + $4->action, $6->action, $8->action, $9->action, 0, 0, + $1->type ); + }; +factor: + '(' join ')' final { + /* Create a new factor going to a parenthesized join. */ + $$->factor = new Factor( $2->join ); + $2->join->loc = $1->loc; + }; + +nonterm range_lit +{ + Literal *literal; +}; + +# Literals which can be the end points of ranges. +range_lit: + TK_Literal final { + /* Range literas must have only one char. We restrict this in the parse tree. */ + $$->literal = new Literal( $1->loc, false, $1->data, + $1->length, Literal::LitString ); + }; +range_lit: + alphabet_num final { + /* Create a new literal number. */ + $$->literal = new Literal( $1->token.loc, $1->neg, + $1->token.data, $1->token.length, Literal::Number ); + }; + +nonterm alphabet_num +{ + bool neg; + Token token; +}; + +# Any form of a number that can be used as a basic machine. */ +alphabet_num: + TK_UInt final { + $$->neg = false; + $$->token = *$1; + }; +alphabet_num: + '-' TK_UInt final { + $$->neg = true; + $$->token.set( $2->data, $2->length, $1->loc ); + }; +alphabet_num: + TK_Hex final { + $$->neg = false; + $$->token = *$1; + }; +# +# Regular Expressions. +# + +nonterm regular_expr +{ + RegExpr *regExpr; +}; + +# Parser for regular expression fsms. Any number of expression items which +# generally gives a machine one character long or one character long stared. +regular_expr: + regular_expr regular_expr_item final { + /* An optimization to lessen the tree size. If a non-starred char is + * directly under the left side on the right and the right side is + * another non-starred char then paste them together and return the + * left side. Otherwise just put the two under a new reg exp node. */ + if ( $2->reItem->type == ReItem::Data && !$2->reItem->star && + $1->regExpr->type == RegExpr::RecurseItem && + $1->regExpr->item->type == ReItem::Data && !$1->regExpr->item->star ) + { + /* Append the right side to the right side of the left and toss the + * right side. */ + $1->regExpr->item->data.append( $2->reItem->data ); + delete $2->reItem; + $$->regExpr = $1->regExpr; + } + else { + $$->regExpr = new RegExpr( $1->regExpr, $2->reItem ); + } + }; +regular_expr: + final { + /* Can't optimize the tree. */ + $$->regExpr = new RegExpr(); + }; + +nonterm regular_expr_item +{ + ReItem *reItem; +}; + +# RegularExprItems can be a character spec with an optional staring of the char. +regular_expr_item: + regular_expr_char RE_Star final { + $1->reItem->star = true; + $$->reItem = $1->reItem; + }; +regular_expr_item: + regular_expr_char final { + $$->reItem = $1->reItem; + }; + +nonterm regular_expr_char +{ + ReItem *reItem; +}; + +# A character spec can be a set of characters inside of square parenthesis, a +# dot specifying any character or some explicitly stated character. +regular_expr_char: + RE_SqOpen regular_expr_or_data RE_SqClose final { + $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ); + }; +regular_expr_char: + RE_SqOpenNeg regular_expr_or_data RE_SqClose final { + $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ); + }; +regular_expr_char: + RE_Dot final { + $$->reItem = new ReItem( $1->loc, ReItem::Dot ); + }; +regular_expr_char: + RE_Char final { + $$->reItem = new ReItem( $1->loc, $1->data, $1->length ); + }; + +# The data inside of a [] expression in a regular expression. Accepts any +# number of characters or ranges. */ +nonterm regular_expr_or_data +{ + ReOrBlock *reOrBlock; +}; + +regular_expr_or_data: + regular_expr_or_data regular_expr_or_char final { + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( $2->reOrItem->type == ReOrItem::Data && + $1->reOrBlock->type == ReOrBlock::RecurseItem && + $1->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + $1->reOrBlock->item->data.append( $2->reOrItem->data ); + delete $2->reOrItem; + $$->reOrBlock = $1->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem ); + } + }; +regular_expr_or_data: + final { + $$->reOrBlock = new ReOrBlock(); + }; + +# A single character inside of an or expression. Can either be a character or a +# set of characters. +nonterm regular_expr_or_char +{ + ReOrItem *reOrItem; +}; + +regular_expr_or_char: + RE_Char final { + $$->reOrItem = new ReOrItem( $1->loc, $1->data, $1->length ); + }; +regular_expr_or_char: + RE_Char RE_Dash RE_Char final { + $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] ); + }; + +# +# Inline Lists for inline host code. +# + +type inline_list +{ + InlineList *inlineList; +}; + +nonterm inline_block uses inline_list; + +inline_block: + inline_block inline_block_item + final { + /* Append the item to the list, return the list. */ + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; + +inline_block: + final { + /* Start with empty list. */ + $$->inlineList = new InlineList; + }; + +type inline_item +{ + InlineItem *inlineItem; +}; + +nonterm inline_block_item uses inline_item; +nonterm inline_block_interpret uses inline_item; + +inline_block_item: + inline_expr_any + final { + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; + +inline_block_item: + inline_block_symbol + final { + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; + +inline_block_item: + inline_block_interpret + final { + /* Pass the inline item up. */ + $$->inlineItem = $1->inlineItem; + }; + +nonterm inline_block_symbol uses token_type; + +inline_block_symbol: ',' final { $$->token = *$1; }; +inline_block_symbol: ';' final { $$->token = *$1; }; +inline_block_symbol: '(' final { $$->token = *$1; }; +inline_block_symbol: ')' final { $$->token = *$1; }; +inline_block_symbol: '*' final { $$->token = *$1; }; +inline_block_symbol: TK_NameSep final { $$->token = *$1; }; + +# Interpreted statements in a struct block. */ +inline_block_interpret: + inline_expr_interpret final { + /* Pass up interpreted items of inline expressions. */ + $$->inlineItem = $1->inlineItem; + }; +inline_block_interpret: + KW_Hold ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Hold ); + }; +inline_block_interpret: + KW_Exec inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Exec ); + $$->inlineItem->children = $2->inlineList; + }; +inline_block_interpret: + KW_Goto state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, + new NameRef(nameRef), InlineItem::Goto ); + }; +inline_block_interpret: + KW_Goto '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::GotoExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Next state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Next ); + }; +inline_block_interpret: + KW_Next '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::NextExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Call state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Call ); + }; +inline_block_interpret: + KW_Call '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::CallExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Ret ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Ret ); + }; +inline_block_interpret: + KW_Break ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Break ); + }; +inline_block_interpret: + KW_Ncall state_ref ';' final { + $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), + InlineItem::Ncall ); + }; +inline_block_interpret: + KW_Ncall '*' inline_expr ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::NcallExpr ); + $$->inlineItem->children = $3->inlineList; + }; +inline_block_interpret: + KW_Nret ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Nret ); + }; +inline_block_interpret: + KW_Nbreak ';' final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Nbreak ); + }; +inline_block_interpret: + TK_SubstRef final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Subst ); + + ActionParamList::Iter api = *paramList; + for ( ; api.lte(); api++ ) { + if ( (*api)->name == $1->data ) + break; + } + + if ( api.end() ) + pd->id->error( $1->loc ) << "invalid parameter reference \"$" << $1->data << "\"" << endl; + else { + $$->inlineItem->substPos = api.pos(); + } + }; + + +nonterm inline_expr uses inline_list; + +inline_expr: + inline_expr inline_expr_item + final { + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; +inline_expr: + final { + /* Init the list used for this expr. */ + $$->inlineList = new InlineList; + }; + +nonterm inline_expr_item uses inline_item; + +inline_expr_item: + inline_expr_any + final { + /* Return a text segment. */ + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; +inline_expr_item: + inline_expr_symbol + final { + /* Return a text segment, must heap alloc the text. */ + $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); + }; +inline_expr_item: + inline_expr_interpret + final{ + /* Pass the inline item up. */ + $$->inlineItem = $1->inlineItem; + }; + +nonterm inline_expr_any uses token_type; + +inline_expr_any: IL_WhiteSpace try { $$->token = *$1; }; +inline_expr_any: IL_Comment try { $$->token = *$1; }; +inline_expr_any: IL_Literal try { $$->token = *$1; }; +inline_expr_any: IL_Symbol try { $$->token = *$1; }; +inline_expr_any: TK_UInt try { $$->token = *$1; }; +inline_expr_any: TK_Hex try { $$->token = *$1; }; +inline_expr_any: TK_Word try { $$->token = *$1; }; + +# Anything in a ExecValExpr that is not dynamically allocated. This includes +# all special symbols caught in inline code except the semi. + +nonterm inline_expr_symbol uses token_type; + +inline_expr_symbol: ',' try { $$->token = *$1; }; +inline_expr_symbol: '(' try { $$->token = *$1; }; +inline_expr_symbol: ')' try { $$->token = *$1; }; +inline_expr_symbol: '*' try { $$->token = *$1; }; +inline_expr_symbol: TK_NameSep try { $$->token = *$1; }; + +nonterm inline_expr_interpret uses inline_item; + +inline_expr_interpret: + KW_PChar + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::PChar ); + }; +inline_expr_interpret: + KW_Char + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Char ); + }; +inline_expr_interpret: + KW_CurState + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Curs ); + }; +inline_expr_interpret: + KW_TargState + final { + $$->inlineItem = new InlineItem( $1->loc, InlineItem::Targs ); + }; +inline_expr_interpret: + KW_Entry '(' state_ref ')' + final { + $$->inlineItem = new InlineItem( $1->loc, + new NameRef(nameRef), InlineItem::Entry ); + }; + +# A local state reference. Cannot have :: prefix. +local_state_ref: + no_name_sep state_ref_names; + +# Clear the name ref structure. +no_name_sep: + final { + nameRef.empty(); + }; + +# A qualified state reference. +state_ref: opt_name_sep state_ref_names; + +# Optional leading name separator. +opt_name_sep: + TK_NameSep + final { + /* Insert an initial null pointer val to indicate the existence of the + * initial name seperator. */ + nameRef.setAs( 0 ); + }; +opt_name_sep: + final { + nameRef.empty(); + }; + +# List of names separated by :: +state_ref_names: + state_ref_names TK_NameSep TK_Word + final { + nameRef.append( $3->data ); + }; +state_ref_names: + TK_Word + final { + nameRef.append( $1->data ); + }; + +}%% + +%%{ + write types; + write data; +}%% + +void Parser6::init() +{ + %% write init; +} + +int Parser6::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + +void Parser6::clear() +{ + while ( block != 0 ) { + Parser6_Block *next = block->next; + free( block ); + block = next; + } + + clearTokdata( this ); +} + +void Parser6::tryMachineDef( const InputLoc &loc, char *name, + MachineDef *machineDef, bool isInstance ) +{ + GraphDictEl *newEl = pd->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, machineDef ); + newEl->isInstance = isInstance; + newEl->loc = loc; + newEl->value->isExport = exportContext[exportContext.length()-1]; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + pd->id->error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +ostream &Parser6::parse_error( int tokId, Token &token ) +{ + ostream &err = pd->id->error( token.loc ); + + err << "at token "; + if ( tokId < 128 ) + err << "\"" << Parser6_lelNames[tokId] << "\""; + else + err << Parser6_lelNames[tokId]; + if ( token.data != 0 ) + err << " with data \"" << token.data << "\""; + err << ": "; + + return err; +} + +int Parser6::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + token.data = tokstart; + token.length = toklen; + token.loc.fileName = loc.fileName; + token.loc.line = loc.line; + token.loc.col = loc.col; + int res = parseLangEl( tokId, &token ); + if ( res < 0 ) { + parse_error(tokId, token) << "parse error" << endl; + pd->id->abortCompile( 1 ); + } + return res; +} + +/* Send eof to all parsers. */ +void Parser6::terminateParser() +{ + /* FIXME: a proper token is needed here. Suppose we should use the + * location of EOF in the last file that the parser was referenced in. */ + InputLoc loc; + loc.fileName = "<EOF>"; + loc.line = 0; + loc.col = 0; + + token( loc, Parser6_tk_eof, 0, 0 ); +} + + diff --git a/ragel/rlparse.lm b/ragel/rlparse.lm new file mode 100644 index 00000000..be6e1066 --- /dev/null +++ b/ragel/rlparse.lm @@ -0,0 +1,207 @@ +include 'ragel.lm' +include 'rlreduce.lm' + +namespace inline + lex + literal `fpc `fc `fcurs `ftargs + `fentry `fhold `fexec `fgoto `fnext + `fcall `fret `fbreak `fncall `fnret `fnbreak + + token ident /ident/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + token dec_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + literal + `{ `} `:: `* `, `( `) `; + + token var_ref + / "$" [a-zA-Z_][a-zA-Z_0-9]* / + { + if GblActionParams + { + input->push( make_token( + typeid<var_ref>, input->pull( match_length ) ) ) + } + else + { + # Just pull one char. Don't consume the word because it may + # be a keyword. + input->push( make_token( + typeid<c_any>, input->pull( 1 ) ) ) + } + } + + token c_any + / any / + end + +end + + +namespace host + lex + literal `%%{ + + token close_inc /'}--%%'/ + { + input->push( make_token( typeid<close_inc>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token close_imp /'}++%%'/ + { + input->push( make_token( typeid<close_imp>, input->pull( match_length ) ) ) + restoreGlobals() + } + + token slr / '%%' [^{] [^\n]* '\n' / + { + # Translates single line to multi-line + input->pull( 2 ) + R: str = input->pull( match_length - 3 ) + input->push( "\n}%%" ) + input->push( R ) + input->push( "%%{" ) + } + + rl NL / '\n' / + + rl s_literal + / "'" ([^'\\\n] | '\\' (any | NL))* "'" / + + rl d_literal + / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / + + literal `define `= + + token ident /ident "'"?/ + token number /digit+/ + token hex_number /'0x' [0-9a-fA-F]+/ + + token comment + / c_comment | cpp_comment / + + token string + / s_literal | d_literal / + + token whitespace + / ( [ \t] | NL )+ / + + token c_any / any / + end + + def tok + [`define whitespace ident whitespace? number] :ImportDefNum + | [`define whitespace ident whitespace? string] :ImportDefStr + | [ident whitespace? `= whitespace? number] :ImportAssignNum + | [ident whitespace? `= whitespace? string] :ImportAssignStr + | [`define] :Def + | [`=] :Eq + | [ident] :Ident + | [number] :Number + | [hex_number] :HexNumber + | [comment] :Comment + | [string] :String + | [whitespace] :Whitespace + | [c_any] :Any +end + +reduction TopLevel + + # Pass Through. + # def tok + # [`define ident number] :Def1 + # | [`define ident string] :Def2 + # | [ident `= number] :Ass1 + # | [ident `= string] :Ass2 + # [`define whitespace ident whitespace? number] :ImportDefNum + # | [`define whitespace ident whitespace? string] :ImportDefStr + # | [ident whitespace? `= whitespace? number] :ImportAssignNum + # | [ident whitespace? `= whitespace? string] :ImportAssignStr + # | [`define] :Def + # | [`=] :Eq + # | [ident] :Ident + # | [number] :Number + # | [hex_number] :HexNumber + # | [comment] :Comment + # | [string] :String + # | [whitespace] :Whitespace + # | [c_any] :Any + + host::tok :ImportDefNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /* $number->neg */, $number->data, + $number->length, Literal::Number ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + + host::tok :ImportDefStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + + host::tok :ImportAssignNum + { + if ( isImport ) + { + Literal *lit = new Literal( @number, + false /*$number->neg */, $number->data, + $number->length, Literal::Number ); + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + + host::tok :ImportAssignStr + { + if ( isImport ) + { + Literal *lit = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + + string name( $ident->data, $ident->length ); + import( @ident, name, lit ); + } + } + +end + + +export RagelError: str + +# File name. The open is expected to succeed. It is tested before the colm +# program is called. +A: list_el<str> = argv->pop_head_el() +GblFileName = A->value + +# Remaining items are include paths. +while ( argv->length > 0 ) { + A = argv->pop_head_el() + GblIncludePaths->push_tail_el( A ) +} + +Stream: stream = open( GblFileName, "r" ) +reduce TopLevel start[ Stream ] +RagelError = error diff --git a/ragel/rlreduce.lm b/ragel/rlreduce.lm new file mode 100644 index 00000000..88b807f7 --- /dev/null +++ b/ragel/rlreduce.lm @@ -0,0 +1,2565 @@ +reduction TopLevel + # def machine_name + # [`machine word `;] :MachineName + ragel::machine_name :MachineName + { + string machine( $mn_word->data, $mn_word->length ); + + if ( includeDepth == 0 ) { + /* Maintain section dict, section list and the cur section pointer + * if we are in the top level. */ + SectionDictEl *sdEl = id->sectionDict.find( machine ); + if ( sdEl == 0 ) { + sdEl = new SectionDictEl( machine ); + sdEl->value = new Section( machine ); + id->sectionDict.insert( sdEl ); + id->sectionList.append( sdEl->value ); + } + + section = sdEl->value; + + ParseDataDictEl *pdEl = id->parseDataDict.find( machine ); + if ( pdEl == 0 ) { + InputLoc sectionLoc = @1; + pdEl = new ParseDataDictEl( machine ); + pdEl->value = new ParseData( id, machine, + id->nextMachineId++, sectionLoc, hostLang, + minimizeLevel, minimizeOpt ); + id->parseDataDict.insert( pdEl ); + id->parseDataList.append( pdEl->value ); + } + + pd = pdEl->value; + } + } + + # def statement + # [assignment] :Assignment + # | [instantiation] :Instantiation + # | [nfa_union] :NfaUnion + # | [action_spec] :ActionSpec + # | [`prepush action_block] :PrePush commit + # | [`postpop action_block] :PostPop commit + # | [`nfaprepush action_block] :NfaPrePush commit + # | [`nfapostpop action_block] :NfaPostPop commit + # | [`variable variable_name inline_expr_reparse] :Variable commit + # | [`alphtype alphtype_type `;] :AlphType commit + # | [`access inline_expr_reparse] :Access commit + # | [`write Cmd: word ArgList: write_arg* `;] :Write commit + # | [`getkey inline_expr_reparse] :GetKey commit + # | [`import string `;] :Import commit + # | [`include include_spec `;] :Include commit + + # def assignment + # [opt_export def_name `= join `;] :Assignment commit + ragel::assignment :Assignment + { + InputLoc loc = &$def_name->loc; + + bool exportMachine = $opt_export->isSet; + if ( exportMachine ) + exportContext.append( true ); + + string name( $def_name->tok.data, $def_name->tok.length ); + + /* Main machine must be an instance. */ + bool isInstance = false; + if ( name == MAIN_MACHINE ) { + pd->id->warning(loc) << "main machine will be implicitly instantiated" << endl; + isInstance = true; + } + + MachineDef *machineDef = new MachineDef( $join->join ); + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( loc, name, machineDef, isInstance ); + + if ( exportMachine ) + exportContext.remove( exportContext.length()-1 ); + + /* Pass a location to join_or_lm */ + if ( machineDef->join != 0 ) + machineDef->join->loc = loc; + } + + # def instantiation + # [opt_export def_name `:= lm `;] :Instantiation commit + ragel::instantiation :Instantiation + { + InputLoc loc = &$def_name->loc; + + bool exportMachine = $opt_export->isSet; + if ( exportMachine ) + exportContext.append( true ); + + string name( $def_name->tok.data, $def_name->tok.length ); + + MachineDef *machineDef = $lm->machineDef; + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( loc, name, machineDef, true ); + + if ( exportMachine ) + exportContext.remove( exportContext.length()-1 ); + + /* Pass a location to join_or_lm */ + if ( machineDef->join != 0 ) + machineDef->join->loc = loc; + } + + # def def_name + # [word] :Word + ragel::def_name + { + RedToken tok; + colm_location loc; + } + + ragel::def_name :Word + { + string data( $word->data, $word->length ); + $$->tok.set( $word, @word ); + $$->loc = *@1; + + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) + pd->fsmCtx->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + } + + # def nfa_union + # [def_name `|= nfa_rounds nfa_expr `;] :NfaUnion commit + ragel::nfa_union :NfaUnion + { + InputLoc loc = &$def_name->loc; + string name( $def_name->tok.data, $def_name->tok.length ); + + $nfa_expr->nfaUnion->roundsList = $nfa_rounds->roundsList; + + MachineDef *machineDef = new MachineDef( $nfa_expr->nfaUnion ); + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( loc, name, machineDef, true ); + } + + # def action_spec + # [`action word action_params action_block] :ActionSpecParams commit + # | [`action word action_block] :ActionSpec commit + ragel::action_spec + { + Action *action; + } + + ragel::action_spec :ActionSpecParams + { + string data( $word->data, $word->length ); + if ( pd->actionDict.find( data ) ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error(@word) << "action \"" << data << "\" already defined" << endl; + } + else { + /* Add the action to the list of actions. */ + Action *newAction = new Action( &$action_block->loc, data, + $action_block->inlineList, pd->fsmCtx->nextCondId++ ); + + /* Insert to list and dict. */ + pd->fsmCtx->actionList.append( newAction ); + pd->actionDict.insert( newAction ); + + newAction->paramList = $action_params->paramList; + if ( $action_params->paramList != 0 ) + newAction->argListMap = new ActionArgListMap; + } + } + + ragel::action_spec :ActionSpec + { + string data( $word->data, $word->length ); + if ( pd->actionDict.find( data ) ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error(@word) << "action \"" << data << "\" already defined" << endl; + } + else { + /* Add the action to the list of actions. */ + Action *newAction = new Action( &$action_block->loc, data, + $action_block->inlineList, pd->fsmCtx->nextCondId++ ); + + /* Insert to list and dict. */ + pd->fsmCtx->actionList.append( newAction ); + pd->actionDict.insert( newAction ); + } + } + + # def statement + # | [`prepush action_block] :PrePush commit + # | [`postpop action_block] :PostPop commit + ragel::statement :PrePush + { + if ( pd->fsmCtx->prePushExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error(@1) << "prepush code already defined" << endl; + } + pd->fsmCtx->prePushExpr = new InlineBlock( @1, $action_block->inlineList ); + + } + ragel::statement :PostPop + { + if ( pd->fsmCtx->postPopExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error(@1) << "postpop code already defined" << endl; + } + pd->fsmCtx->postPopExpr = new InlineBlock( @1, $action_block->inlineList ); + } + + # def statement + # [`nfaprepush action_block] :NfaPrePush commit + ragel::statement :NfaPrePush + { + if ( pd->fsmCtx->nfaPrePushExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error(@1) << "nfa_pre_push code already defined" << endl; + } + + pd->fsmCtx->nfaPrePushExpr = new InlineBlock( @1, $action_block->inlineList ); + } + + # def statement + # [`nfapostpop action_block] :NfaPostPop commit + ragel::statement :NfaPostPop + { + if ( pd->fsmCtx->nfaPostPopExpr != 0 ) { + /* Recover by just ignoring the duplicate. */ + pd->id->error(@1) << "nfa_post_pop code already defined" << endl; + } + + pd->fsmCtx->nfaPostPopExpr = new InlineBlock( @1, $action_block->inlineList ); + } + + # def statement + # | [`variable variable_name inline_expr_reparse] :Variable commit + # | [`access inline_expr_reparse] :Access commit + ragel::statement :Variable + { + string data( $variable_name->data, $variable_name->length ); + bool wasSet = pd->setVariable( data.c_str(), + $inline_expr_reparse->inlineList ); + if ( !wasSet ) + pd->id->error(@1) << "bad variable name: " << $variable_name->data << endl; + } + + ragel::statement :Access + { + pd->fsmCtx->accessExpr = $inline_expr_reparse->inlineList; + } + + # def statement + # | [`write Cmd: word ArgList: write_arg* `;] :Write commit + ragel::statement :Write + { + if ( !isImport && includeDepth == 0 ) { + { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::Write; + inputItem->loc = @Cmd; + inputItem->name = section->sectionName; + inputItem->section = section; + + id->inputItems.append( inputItem ); + } + id->curItem = id->curItem->next; + InputItem *inputItem = id->curItem; + + string cmd( $Cmd->data, $Cmd->length ); + inputItem->writeArgs.push_back( cmd ); + inputItem->writeArgs.insert( inputItem->writeArgs.end(), writeArgs.begin(), writeArgs.end() ); + + inputItem->pd = pd; + } + + /* Clear the write args collector. */ + writeArgs.clear(); + } + + # def alphtype_type + # [W1: word] :One + # | [W1: word W2: word] :Two + ragel::alphtype_type :One + { + string one( $W1->data, $W1->length ); + if ( ! pd->setAlphType( @W1, hostLang, one.c_str() ) ) { + // Recover by ignoring the alphtype statement. + pd->id->error(@W1) << "\"" << one << + "\" is not a valid alphabet type" << endl; + } + } + + ragel::alphtype_type :Two + { + string one( $W1->data, $W1->length ); + string two( $W2->data, $W2->length ); + if ( ! pd->setAlphType( @W1, hostLang, one.c_str(), two.c_str() ) ) { + // Recover by ignoring the alphtype statement. + pd->id->error(@W1) << "\"" << one << + "\" is not a valid alphabet type" << endl; + } + } + + # def statement + # | [`getkey inline_expr_reparse] :GetKey commit + ragel::statement :GetKey + { + pd->fsmCtx->getKeyExpr = $inline_expr_reparse->inlineList; + } + + ragel::open_inc :OpenInc + { + includeDepth += 1; + } + + ragel::close_inc :CloseInc + { + includeDepth -= 1; + } + + ragel::open_imp :OpenImp + { + isImport = true; + } + + ragel::close_imp :CloseImp + { + isImport = false; + } + + # def join + # [join `, expression] :Rec + # | [expression] :Base + ragel::join + { + Join *join; + } + + ragel::join :Rec + { + $$->join = $_join->join; + $$->join->exprList.append( $expression->expr ); + } + + ragel::join :Base + { + $$->join = new Join( $expression->expr ); + } + + # def expression + # [expr_left expression_op_list] :Expression + ragel::expression + { + Expression *expr; + } + + ragel::expression :Expression + { + // 1. reverse the list + // 2. put the new term at the end. + Expression *prev = new Expression( $expr_left->term ); + Expression *cur = $expression_op_list->expr; + while ( cur != 0 ) { + Expression *next = cur->expression; + + /* Reverse. */ + cur->expression = prev; + + prev = cur; + cur = next; + } + + $$->expr = prev; + } + + ragel::expr_left + { + Term *term; + } + + ragel::expr_left :Term + { + $$->term = $term->term; + } + + # def expression_op_list + # [expression_op expression_op_list] :Op + # | [] :Empty + ragel::expression_op_list + { + Expression *expr; + } + + ragel::expression_op_list :Op + { + $$->expr = new Expression( $_expression_op_list->expr, + $expression_op->term, $expression_op->type ); + } + + ragel::expression_op_list :Empty + { + $$->expr = 0; + } + + + # def expression_op + # [`| term] :Or + # | [`& term] :And + # | [`- term] :Sub + # | [`-- term] :Ssub + ragel::expression_op + { + Expression::Type type; + Term *term; + } + + ragel::expression_op :Or + { + $$->type = Expression::OrType; + $$->term = $term->term; + } + + ragel::expression_op :And + { + $$->type = Expression::IntersectType; + $$->term = $term->term; + } + + ragel::expression_op :Sub + { + $$->type = Expression::SubtractType; + $$->term = $term->term; + } + + ragel::expression_op :Ssub + { + $$->type = Expression::StrongSubtractType; + $$->term = $term->term; + } + + + # def term + # [term_left term_op_list_short] :Term + ragel::term + { + Term *term; + } + + ragel::term :Term + { + // 1. reverse the list + // 2. put the new term at the end. + Term *prev = new Term( $term_left->fwa ); + Term *cur = $term_op_list_short->term; + while ( cur != 0 ) { + Term *next = cur->term; + + /* Reverse. */ + cur->term = prev; + + prev = cur; + cur = next; + } + + $$->term = prev; + } + + # def term_left + # [factor_label] :FactorLabel + ragel::term_left + { + FactorWithAug *fwa; + } + + ragel::term_left :FactorLabel + { + $$->fwa = $factor_label->fwa; + } + + # # This list is done manually to get shortest match. + # def term_op_list_short + # [] :Empty + # | [term_op term_op_list_short] :Terms + ragel::term_op_list_short + { + Term *term; + } + + ragel::term_op_list_short :Empty + { + $$->term = 0; + } + + ragel::term_op_list_short :Terms + { + $$->term = new Term( $_term_op_list_short->term, + $term_op->fwa, $term_op->type ); + } + + + # def term_op + # [factor_label] :None + # | [`. factor_label] :Dot + # | [`:> factor_label] :ColonLt + # | [`:>> factor_label] :ColonLtLt + # | [`<: factor_label] :GtColon + ragel::term_op + { + Term::Type type; + FactorWithAug *fwa; + } + + ragel::term_op :None + { + $$->type = Term::ConcatType; + $$->fwa = $factor_label->fwa; + } + + ragel::term_op :Dot + { + $$->type = Term::ConcatType; + $$->fwa = $factor_label->fwa; + } + + ragel::term_op :ColonLt + { + $$->type = Term::RightStartType; + $$->fwa = $factor_label->fwa; + } + + ragel::term_op :ColonLtLt + { + $$->type = Term::RightFinishType; + $$->fwa = $factor_label->fwa; + } + + ragel::term_op :GtColon + { + $$->type = Term::LeftType; + $$->fwa = $factor_label->fwa; + } + + # def factor_label + # [word `: factor_label] :Label + # | [factor_ep] :Ep + ragel::factor_label + { + FactorWithAug *fwa; + } + + ragel::factor_label :Label + { + $$->fwa = $_factor_label->fwa; + + InputLoc loc = @word; + string label( $word->data, $word->length ); + + $$->fwa->labels.insert( $$->fwa->labels.begin(), Label(loc, label) ); + + if ( pd->id->isBreadthLabel( label ) ) + $$->fwa->labels[0].cut = true; + } + + ragel::factor_label :Ep + { + $$->fwa = $factor_ep->fwa; + } + + # def factor_ep + # [factor_aug `-> epsilon_target] :Epsilon + # | [factor_aug] :Base + ragel::factor_ep + { + FactorWithAug *fwa; + } + + ragel::factor_ep :Epsilon + { + $$->fwa = $factor_aug->fwa; + $1->fwa->epsilonLinks.append( EpsilonLink( @2, $epsilon_target->nameRef ) ); + } + + ragel::factor_ep :Base + { + $$->fwa = $factor_aug->fwa; + } + + # def epsilon_target + # [epsilon_target `:: word] :Rec + # | [word] :Base + ragel::epsilon_target + { + NameRef *nameRef; + } + + ragel::epsilon_target :Rec + { + $$->nameRef = $_epsilon_target->nameRef; + $$->nameRef->append( string( $word->data, $word->length ) ); + } + + ragel::epsilon_target :Base + { + $$->nameRef = new NameRef; + $$->nameRef->append( string( $word->data, $word->length ) ); + } + + # def named_action_ref + # [word] :Plain + # | [word `( opt_action_arg_list `)] :Args + ragel::named_action_ref + { + Action *action; + } + + ragel::named_action_ref :Plain + { + /* Set the name in the actionDict. */ + string data( $word->data, $word->length ); + Action *action = pd->actionDict.find( data ); + if ( action != 0 ) { + if ( action->paramList != 0 ) + pd->id->error(@word) << "expecting no action args for " << data << endp; + + /* Pass up the action element */ + $$->action = action; + } + else { + /* Will recover by returning null as the action. */ + pd->id->error(@word) << "action lookup of \"" << data << "\" failed" << endl; + $$->action = 0; + } + } + + ragel::named_action_ref :Args + { + /* Set the name in the actionDict. */ + string data( $word->data, $word->length ); + Action *action = pd->actionDict.find( data ); + if ( action != 0 ) { + if ( action->paramList == 0 ) + pd->id->error(@word) << "expecting action args" << endp; + + /* Pass up the action element */ + $$->action = action; + } + else { + /* Will recover by returning null as the action. */ + pd->id->error(@word) << "action lookup of \"" << data << "\" failed" << endl; + $$->action = 0; + } + + if ( $$->action != 0 ) { + ActionArgList *argList = $opt_action_arg_list->argList; + ActionParamList *paramList = action->paramList; + + /* Make sure the number of actions line up. */ + if ( argList->length() != paramList->length() ) { + pd->id->error(@1) << "wrong number of action " + "arguments for \"" << data << "\"" << endl; + } + + /* Now we need to specialize using the supplied args. We can only + * present an Action* to fsmcodegen. */ + ActionArgListMapEl *el = action->argListMap->find( argList ); + if ( el == 0 ) { + /* Allocate an action representing this specialization. */ + Action *specAction = Action::cons( @1, action, + argList, pd->fsmCtx->nextCondId++ ); + pd->fsmCtx->actionList.append( specAction ); + + el = action->argListMap->insert( argList, specAction ); + } + else { + /* Can delete $3->arg list. */ + delete $opt_action_arg_list->argList; + } + + $$->action = el->value; + } + } + + # def action_arg_list + # [action_arg_list `, action_ref] :Rec + # | [action_ref] :Base + ragel::action_arg_list + { + ActionArgList *argList; + } + + ragel::action_arg_list :Rec + { + $$->argList = $_action_arg_list->argList; + $$->argList->append( $action_ref->action ); + } + + ragel::action_arg_list :Base + { + $$->argList = new ActionArgList; + $$->argList->append( $action_ref->action ); + } + + # def opt_action_arg_list + # [action_arg_list] :List + # | [] :Empty + ragel::opt_action_arg_list + { + ActionArgList *argList; + } + + ragel::opt_action_arg_list :List + { + $$->argList = $action_arg_list->argList; + } + + ragel::opt_action_arg_list :Empty + { + $$->argList = new ActionArgList; + } + + # def action_ref + # [named_action_ref] :NamedRef + # | [`( named_action_ref `)] :ParenNamed + # | [action_block] :Block + ragel::action_ref + { + Action *action; + } + + ragel::action_ref :NamedRef + { + $$->action = $named_action_ref->action; + } + + ragel::action_ref :ParenNamed + { + $$->action = $named_action_ref->action; + } + + ragel::action_ref :Block + { + /* Create the action, add it to the list and pass up. */ + Action *newAction = new Action( &$action_block->loc, std::string(), + $action_block->inlineList, pd->fsmCtx->nextCondId++ ); + pd->fsmCtx->actionList.append( newAction ); + $$->action = newAction; + } + + # def action_params + # [`( opt_action_param_list `)] + ragel::action_params + { + ActionParamList *paramList; + } + + ragel::action_params :List + { + $$->paramList = $opt_action_param_list->paramList; + paramList = $2->paramList; + } + + # def opt_action_param_list + # [action_param_list] :List + # | [] :Empty + ragel::opt_action_param_list + { + ActionParamList *paramList; + } + + ragel::opt_action_param_list :List + { + $$->paramList = $action_param_list->paramList; + } + + ragel::opt_action_param_list :Empty + { + $$->paramList = new ActionParamList; + } + + # def action_param + # [word] + ragel::action_param + { + ActionParam *param; + } + + ragel::action_param :Word + { + string param( $word->data, $word->length ); + $$->param = new ActionParam( param ); + } + + # def action_param_list + # [action_param_list `, action_param] + # | [action_param] + ragel::action_param_list + { + ActionParamList *paramList; + } + + ragel::action_param_list :Rec + { + $$->paramList = $_action_param_list->paramList; + $$->paramList->append( $action_param->param ); + } + + ragel::action_param_list :Base + { + $$->paramList = new ActionParamList; + $$->paramList->append( $action_param->param ); + } + + + # def action_block + # [`{ c_select CInlineBlock: inline::inline_block `}] :C + # | [`{ ruby_select RubyInlineBlock: ruby_inline::inline_block ruby_inline::`}] + # | [`{ ocaml_select OCamlInlineBlock: ocaml_inline::inline_block ocaml_inline::`}] + # | [`{ crack_select OCamlInlineBlock: crack_inline::inline_block crack_inline::`}] + ragel::action_block + { + colm_location loc; + InlineList *inlineList; + } + + ragel::action_block :ActionBlock + { + $$->loc = *@1; + $$->inlineList = $CInlineBlock->inlineList; + } + + # def inline_expr_reparse + # [_inline_expr_reparse] :Reparse + # | [action_expr] :ActionExpr + ragel::inline_expr_reparse + { + InlineList *inlineList; + } + ragel::inline_expr_reparse :ActionExpr + { + $$->inlineList = $action_expr->inlineList; + } + + # def action_expr + # [`{ c_select CInlineExpr: inline::inline_expr `}] :C + # | [`{ ruby_select RubyInlineExpr: ruby_inline::inline_expr ruby_inline::`}] + # | [`{ ocaml_select OCamlInlineExpr: ocaml_inline::inline_expr ocaml_inline::`}] + # | [`{ crack_select CrackInlineExpr: crack_inline::inline_expr crack_inline::`}] + ragel::action_expr + { + colm_location loc; + InlineList *inlineList; + } + + ragel::action_expr :ActionExpr + { + $$->loc = *@1; + $$->inlineList = $CInlineExpr->inlineList; + } + + # def state_ref + # [opt_name_sep state_ref_names] :Ref + state_ref::state_ref + { + NameRef *nameRef; + } + + state_ref::state_ref :Ref + { + $$->nameRef = $state_ref_names->nameRef; + if ( $opt_name_sep->nameSep ) + $$->nameRef->prepend( "" ); + } + + # def opt_name_sep + # [srlex::`::] :ColonColon + # | [] :Empty + state_ref::opt_name_sep + { + bool nameSep; + } + + state_ref::opt_name_sep :ColonColon + { + $$->nameSep = true; + } + + state_ref::opt_name_sep :Empty + { + $$->nameSep = false; + } + + # def state_ref_names + # [state_ref_names srlex::`:: srlex::word] :Rec + # | [srlex::word] :Base + state_ref::state_ref_names + { + NameRef *nameRef; + } + + state_ref::state_ref_names :Rec + { + $$->nameRef = $_state_ref_names->nameRef; + $$->nameRef->append( string( $word->data, $word->length ) ); + } + + state_ref::state_ref_names :Base + { + $$->nameRef = new NameRef; + $$->nameRef->append( string( $word->data, $word->length ) ); + } + + # def priority_aug + # [uint] :NoSign + # | [`+ uint] :Plus + # | [`- uint] :Minus + ragel::priority_aug + { + int priorityNum; + } + ragel::priority_aug :NoSign + { + string data( $uint->data, $uint->length ); + $$->priorityNum = tryLongScan( @1, data.c_str() ); + } + ragel::priority_aug :Plus + { + string data( $uint->data, $uint->length ); + $$->priorityNum = tryLongScan( @1, data.c_str() ); + } + ragel::priority_aug :Minus + { + string data( $uint->data, $uint->length ); + $$->priorityNum = -1 * tryLongScan( @1, data.c_str() ); + } + + #def priority_name + # [word] :Word + + ragel::priority_name + { + int priorityName; + } + + ragel::priority_name :Word + { + string data( $word->data, $word->length ); + + // Lookup/create the priority key. + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) + pd->fsmCtx->nextPriorKey += 1; + + // Use the inserted/found priority key. + $$->priorityName = priorDictEl->value; + } + + # def error_name + # [word] :Word + ragel::error_name + { + int errName; + } + + ragel::error_name :Word + { + string data( $word->data, $word->length ); + /* Lookup/create the priority key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + + /* Use the inserted/found priority key. */ + $$->errName = localErrDictEl->value; + } + + # def aug_base + # [`@] :Finish | [`>] :Enter | [`%] :Leave | [`$] :All + + ragel::aug_base + { + colm_location loc; + AugType augType; + } + + ragel::aug_base :Enter + { $$->loc = *@1; $$->augType = at_start; } + ragel::aug_base :All + { $$->loc = *@1; $$->augType = at_all; } + ragel::aug_base :Finish + { $$->loc = *@1; $$->augType = at_finish; } + ragel::aug_base :Leave + { $$->loc = *@1; $$->augType = at_leave; } + + # def aug_cond + # [`>?] :Start1 | [`$?] :All1 | [`%?] :Leave1 + # | [`> `when] :Start2 | [`$ `when] :All2 | [`% `when] :Leave2 + # | [`inwhen] :Start3 | [`when] :All3 | [`outwhen] :Leave3 + + ragel::aug_cond + { + colm_location loc; + AugType augType; + } + + ragel::aug_cond :Start1 + { $$->loc = *@1; $$->augType = at_start; } + ragel::aug_cond :Start2 + { $$->loc = *@1; $$->augType = at_start; } + ragel::aug_cond :Start3 + { $$->loc = *@1; $$->augType = at_start; } + ragel::aug_cond :All1 + { $$->loc = *@1; $$->augType = at_all; } + ragel::aug_cond :All2 + { $$->loc = *@1; $$->augType = at_all; } + ragel::aug_cond :All3 + { $$->loc = *@1; $$->augType = at_all; } + ragel::aug_cond :Leave1 + { $$->loc = *@1; $$->augType = at_leave; } + ragel::aug_cond :Leave2 + { $$->loc = *@1; $$->augType = at_leave; } + ragel::aug_cond :Leave3 + { $$->loc = *@1; $$->augType = at_leave; } + + # def aug_to_state + # [`>~] :Start1 | [`<~] :NotStart1 | [`$~] :All1 + # | [`%~] :Final1 | [`@~] :NotFinal1 | [`<>~] :Middle1 + # | [`> `to] :Start2 | [`< `to] :NotStart2 | [`$ `to] :All2 + # | [`% `to] :Final2 | [`@ `to] :NotFinal2 | [`<> `to] :Middle2 + + ragel::aug_to_state + { + colm_location loc; + AugType augType; + } + + ragel::aug_to_state :Start1 + { $$->loc = *@1; $$->augType = at_start_to_state; } + ragel::aug_to_state :Start2 + { $$->loc = *@1; $$->augType = at_start_to_state; } + ragel::aug_to_state :NotStart1 + { $$->loc = *@1; $$->augType = at_not_start_to_state; } + ragel::aug_to_state :NotStart2 + { $$->loc = *@1; $$->augType = at_not_start_to_state; } + ragel::aug_to_state :All1 + { $$->loc = *@1; $$->augType = at_all_to_state; } + ragel::aug_to_state :All2 + { $$->loc = *@1; $$->augType = at_all_to_state; } + ragel::aug_to_state :Final1 + { $$->loc = *@1; $$->augType = at_final_to_state; } + ragel::aug_to_state :Final2 + { $$->loc = *@1; $$->augType = at_final_to_state; } + ragel::aug_to_state :NotFinal1 + { $$->loc = *@1; $$->augType = at_not_final_to_state; } + ragel::aug_to_state :NotFinal2 + { $$->loc = *@1; $$->augType = at_not_final_to_state; } + ragel::aug_to_state :Middle1 + { $$->loc = *@1; $$->augType = at_middle_to_state; } + ragel::aug_to_state :Middle2 + { $$->loc = *@1; $$->augType = at_middle_to_state; } + + # def aug_from_state + # [`>*] :Start1 | [`<*] :NotStart1 | [`$*] :All1 + # | [`%*] :Final1 | [`@*] :NotFinal1 | [`<>*] :Middle1 + # | [`> `from] :Start2 | [`< `from] :NotStart2 | [`$ `from] :All2 + # | [`% `from] :Final2 | [`@ `from] :NotFinal2 | [`<> `from] :Middle2 + + ragel::aug_from_state + { + colm_location loc; + AugType augType; + } + + ragel::aug_from_state :Start1 + { $$->loc = *@1; $$->augType = at_start_from_state; } + ragel::aug_from_state :Start2 + { $$->loc = *@1; $$->augType = at_start_from_state; } + ragel::aug_from_state :NotStart1 + { $$->loc = *@1; $$->augType = at_not_start_from_state; } + ragel::aug_from_state :NotStart2 + { $$->loc = *@1; $$->augType = at_not_start_from_state; } + ragel::aug_from_state :All1 + { $$->loc = *@1; $$->augType = at_all_from_state; } + ragel::aug_from_state :All2 + { $$->loc = *@1; $$->augType = at_all_from_state; } + ragel::aug_from_state :Final1 + { $$->loc = *@1; $$->augType = at_final_from_state; } + ragel::aug_from_state :Final2 + { $$->loc = *@1; $$->augType = at_final_from_state; } + ragel::aug_from_state :NotFinal1 + { $$->loc = *@1; $$->augType = at_not_final_from_state; } + ragel::aug_from_state :NotFinal2 + { $$->loc = *@1; $$->augType = at_not_final_from_state; } + ragel::aug_from_state :Middle1 + { $$->loc = *@1; $$->augType = at_middle_from_state; } + ragel::aug_from_state :Middle2 + { $$->loc = *@1; $$->augType = at_middle_from_state; } + + # def aug_eof + # [`>/] :Start1 | [`</] :NotStart1 | [`$/] :All1 + # | [`%/] :Final1 | [`@/] :NotFinal1 | [`<>/] :Middle1 + # | [`> `eof] :Start2 | [`< `eof] :NotStart2 | [`$ `eof] :All2 + # | [`% `eof] :Final2 | [`@ `eof] :NotFinal2 | [`<> `eof] :Middle2 + + ragel::aug_eof + { + colm_location loc; + AugType augType; + } + + ragel::aug_eof :Start1 + { $$->loc = *@1; $$->augType = at_start_eof; } + ragel::aug_eof :Start2 + { $$->loc = *@1; $$->augType = at_start_eof; } + ragel::aug_eof :NotStart1 + { $$->loc = *@1; $$->augType = at_not_start_eof; } + ragel::aug_eof :NotStart2 + { $$->loc = *@1; $$->augType = at_not_start_eof; } + ragel::aug_eof :All1 + { $$->loc = *@1; $$->augType = at_all_eof; } + ragel::aug_eof :All2 + { $$->loc = *@1; $$->augType = at_all_eof; } + ragel::aug_eof :Final1 + { $$->loc = *@1; $$->augType = at_final_eof; } + ragel::aug_eof :Final2 + { $$->loc = *@1; $$->augType = at_final_eof; } + ragel::aug_eof :NotFinal1 + { $$->loc = *@1; $$->augType = at_not_final_eof; } + ragel::aug_eof :NotFinal2 + { $$->loc = *@1; $$->augType = at_not_final_eof; } + ragel::aug_eof :Middle1 + { $$->loc = *@1; $$->augType = at_middle_eof; } + ragel::aug_eof :Middle2 + { $$->loc = *@1; $$->augType = at_middle_eof; } + + # def aug_gbl_error + # [`>!] :Start1 | [`<!] :NotStart1 | [`$!] :All1 + # | [`%!] :Final1 | [`@!] :NotFinal1 | [`<>!] :Middle1 + # | [`> `err] :Start2 | [`< `err] :NotStart2 | [`$ `err] :All2 + # | [`% `err] :Final2 | [`@ `err] :NotFinal2 | [`<> `err] :Middle2 + + ragel::aug_gbl_error + { + colm_location loc; + AugType augType; + } + + ragel::aug_gbl_error :Start1 + { $$->loc = *@1; $$->augType = at_start_gbl_error; } + ragel::aug_gbl_error :Start2 + { $$->loc = *@1; $$->augType = at_start_gbl_error; } + ragel::aug_gbl_error :NotStart1 + { $$->loc = *@1; $$->augType = at_not_start_gbl_error; } + ragel::aug_gbl_error :NotStart2 + { $$->loc = *@1; $$->augType = at_not_start_gbl_error; } + ragel::aug_gbl_error :NotStart2 + { $$->loc = *@1; $$->augType = at_not_start_gbl_error; } + ragel::aug_gbl_error :All1 + { $$->loc = *@1; $$->augType = at_all_gbl_error; } + ragel::aug_gbl_error :All2 + { $$->loc = *@1; $$->augType = at_all_gbl_error; } + ragel::aug_gbl_error :Final1 + { $$->loc = *@1; $$->augType = at_final_gbl_error; } + ragel::aug_gbl_error :Final2 + { $$->loc = *@1; $$->augType = at_final_gbl_error; } + ragel::aug_gbl_error :NotFinal1 + { $$->loc = *@1; $$->augType = at_not_final_gbl_error; } + ragel::aug_gbl_error :NotFinal2 + { $$->loc = *@1; $$->augType = at_not_final_gbl_error; } + ragel::aug_gbl_error :Middle1 + { $$->loc = *@1; $$->augType = at_middle_gbl_error; } + ragel::aug_gbl_error :Middle2 + { $$->loc = *@1; $$->augType = at_middle_gbl_error; } + + # def aug_local_error + # [`>^] :Start1 | [`<^] :NotStart1 | [`$^] :All1 + # | [`%^] :Final1 | [`@^] :NotFinal1 | [`<>^] :Middle1 + # | [`> `lerr] :Start2 | [`< `lerr] :NotStart2 | [`$ `lerr] :All2 + # | [`% `lerr] :Final2 | [`@ `lerr] :NotFinal2 | [`<> `lerr] :Middle2 + + ragel::aug_local_error + { + colm_location loc; + AugType augType; + } + + ragel::aug_local_error :Start1 + { $$->loc = *@1; $$->augType = at_start_local_error; } + + ragel::aug_local_error :Start2 + { $$->loc = *@1; $$->augType = at_start_local_error; } + + ragel::aug_local_error :NotStart1 + { $$->loc = *@1; $$->augType = at_not_start_local_error; } + + ragel::aug_local_error :NotStart2 + { $$->loc = *@1; $$->augType = at_not_start_local_error; } + + ragel::aug_local_error :All1 + { $$->loc = *@1; $$->augType = at_all_local_error; } + + ragel::aug_local_error :All2 + { $$->loc = *@1; $$->augType = at_all_local_error; } + + ragel::aug_local_error :Final1 + { $$->loc = *@1; $$->augType = at_final_local_error; } + + ragel::aug_local_error :Final2 + { $$->loc = *@1; $$->augType = at_final_local_error; } + + ragel::aug_local_error :NotFinal1 + { $$->loc = *@1; $$->augType = at_not_final_local_error; } + + ragel::aug_local_error :NotFinal2 + { $$->loc = *@1; $$->augType = at_not_final_local_error; } + + ragel::aug_local_error :Middle1 + { $$->loc = *@1; $$->augType = at_middle_local_error; } + + ragel::aug_local_error :Middle2 + { $$->loc = *@1; $$->augType = at_middle_local_error; } + + # def factor_aug + # [factor_aug aug_base action_ref] :ActionRef + # | [factor_aug aug_base priority_aug] :PriorEmbed + # | [factor_aug aug_base `( priority_name `, priority_aug `)] :NamedPriorEmbed + # | [factor_aug aug_cond action_ref] :CondEmbed + # | [factor_aug aug_cond `! action_ref] :NegCondEmbed + # | [factor_aug aug_to_state action_ref] :ToStateAction + # | [factor_aug aug_from_state action_ref] :FromStateAction + # | [factor_aug aug_eof action_ref] :EofAction + # | [factor_aug aug_gbl_error action_ref] :GblErrorAction + # | [factor_aug aug_local_error action_ref] :LocalErrorDef + # | [factor_aug aug_local_error `( word `, action_ref `)] :LocalErrorName + # | [factor_rep] :Base + ragel::factor_aug + { + FactorWithAug *fwa; + } + + ragel::factor_aug :ActionRef + { + $$->fwa = $_factor_aug->fwa; + + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + $$->fwa->actions.append( ParserAction( + &$aug_base->loc, $aug_base->augType, 0, $action_ref->action ) ); + } + + ragel::factor_aug :PriorEmbed + { + $$->fwa = $_factor_aug->fwa; + + $1->fwa->priorityAugs.append( PriorityAug( $aug_base->augType, + pd->curDefPriorKey, $priority_aug->priorityNum ) ); + } + + ragel::factor_aug :NamedPriorEmbed + { + $$->fwa = $_factor_aug->fwa; + + $1->fwa->priorityAugs.append( PriorityAug( $aug_base->augType, + $priority_name->priorityName, $priority_aug->priorityNum ) ); + } + + ragel::factor_aug :CondEmbed + { + $$->fwa = $_factor_aug->fwa; + + $$->fwa->conditions.append( ConditionTest( &$aug_cond->loc, + $aug_cond->augType, $action_ref->action, true ) ); + } + + ragel::factor_aug :NegCondEmbed + { + $$->fwa = $_factor_aug->fwa; + + $$->fwa->conditions.append( ConditionTest( &$aug_cond->loc, + $aug_cond->augType, $action_ref->action, false ) ); + } + + ragel::factor_aug :ToStateAction + { + $$->fwa = $_factor_aug->fwa; + + $$->fwa->actions.append( ParserAction( &$aug_to_state->loc, + $aug_to_state->augType, 0, $action_ref->action ) ); + } + + ragel::factor_aug :FromStateAction + { + $$->fwa = $_factor_aug->fwa; + + $$->fwa->actions.append( ParserAction( &$aug_from_state->loc, + $aug_from_state->augType, 0, $action_ref->action ) ); + } + + ragel::factor_aug :EofAction + { + $$->fwa = $_factor_aug->fwa; + $1->fwa->actions.append( ParserAction( &$aug_eof->loc, + $aug_eof->augType, 0, $action_ref->action ) ); + } + + ragel::factor_aug :GblErrorAction + { + $$->fwa = $_factor_aug->fwa; + + $1->fwa->actions.append( ParserAction( &$aug_gbl_error->loc, + $aug_gbl_error->augType, pd->curDefLocalErrKey, $action_ref->action ) ); + } + + ragel::factor_aug :LocalErrorDef + { + $$->fwa = $_factor_aug->fwa; + + $$->fwa->actions.append( ParserAction( &$aug_local_error->loc, + $aug_local_error->augType, pd->curDefLocalErrKey, $action_ref->action ) ); + } + + ragel::factor_aug :LocalErrorName + { + $$->fwa = $_factor_aug->fwa; + + $$->fwa->actions.append( ParserAction( &$aug_local_error->loc, + $aug_local_error->augType, $error_name->errName, $action_ref->action ) ); + } + + ragel::factor_aug :Base + { + $$->fwa = new FactorWithAug( $factor_rep->rep ); + } + + #def factor_rep + # [factor_neg factor_rep_op_list] :Op + + ragel::factor_rep + { + FactorWithRep *rep; + } + + ragel::factor_rep :Op + { + FactorWithRep *prev = new FactorWithRep( $factor_neg->neg ); + FactorWithRep *cur = $factor_rep_op_list->rep; + while ( cur != 0 ) { + FactorWithRep *next = cur->factorWithRep; + + /* Reverse. */ + cur->factorWithRep = prev; + + prev = cur; + cur = next; + } + + $$->rep = prev; + } + + + # def factor_rep_op_list + # [factor_rep_op factor_rep_op_list] + # | [] + ragel::factor_rep_op_list + { + FactorWithRep *rep; + } + ragel::factor_rep_op_list :Rec + { + $$->rep = $factor_rep_op->rep; + $$->rep->factorWithRep = $_factor_rep_op_list->rep; + } + ragel::factor_rep_op_list :Base + { + $$->rep = 0; + } + + # def factor_rep_op + # [`*] :Star + # | [`**] :StarStar + # | [`?] :Optional + # | [`+] :Plus + # | [`{ factor_rep_num `}] :ExactRep + # | [`{ `, factor_rep_num `}] :MaxRep + # | [`{ factor_rep_num `, `}] :MinRep + # | [`{ LowRep: factor_rep_num `, HighRep: factor_rep_num `}] :RangeRep + ragel::factor_rep_op + { + FactorWithRep *rep; + } + ragel::factor_rep_op :Star + { + $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::StarType ); + } + ragel::factor_rep_op :StarStar + { + $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::StarStarType ); + } + ragel::factor_rep_op :Optional + { + $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::OptionalType ); + } + ragel::factor_rep_op :Plus + { + $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::PlusType ); + } + ragel::factor_rep_op :ExactRep + { + $$->rep = new FactorWithRep( @1, 0, + $factor_rep_num->rep, 0, + FactorWithRep::ExactType ); + } + ragel::factor_rep_op :MaxRep + { + $$->rep = new FactorWithRep( @1, 0, + 0, $factor_rep_num->rep, + FactorWithRep::MaxType ); + } + ragel::factor_rep_op :MinRep + { + $$->rep = new FactorWithRep( @1, 0, + $factor_rep_num->rep, 0, + FactorWithRep::MinType ); + } + ragel::factor_rep_op :RangeRep + { + $$->rep = new FactorWithRep( @1, 0, + $LowRep->rep, $HighRep->rep, + FactorWithRep::RangeType ); + } + + # def factor_rep_num + # [uint] + ragel::factor_rep_num + { + int rep; + } + + ragel::factor_rep_num :RepNum + { + // Convert the priority number to a long. Check for overflow. + string data( $uint->data, $uint->length ); + errno = 0; + long rep = strtol( data.c_str(), 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + pd->id->error(@uint) << "repetition number " << data << " overflows" << endl; + $$->rep = 1; + } + else { + // Cannot be negative, so no overflow. + $$->rep = rep; + } + } + + + # def factor_neg + # [`! factor_neg] :Bang + # | [`^ factor_neg] :Caret + # | [factor] :Base + ragel::factor_neg + { + FactorWithNeg *neg; + } + + ragel::factor_neg :Bang + { + $$->neg = new FactorWithNeg( @1, + $_factor_neg->neg, FactorWithNeg::NegateType ); + } + + ragel::factor_neg :Caret + { + $$->neg = new FactorWithNeg( @1, + $_factor_neg->neg, FactorWithNeg::CharNegateType ); + } + + ragel::factor_neg :Base + { + $$->neg = new FactorWithNeg( $factor->factor ); + } + + # def opt_max_arg + # [`, action_ref] + ragel::opt_max_arg + { + Action *action; + } + + ragel::opt_max_arg :Action + { + $$->action = $action_ref->action; + } + + ragel::opt_max_arg :Empty + { + $$->action = 0; + } + + # + # :nfa + # + ragel::nfastar + { + Factor::NfaRepeatMode mode; + } + + ragel::nfastar :Default + { + $$->mode = Factor::NfaLegacy; + } + + ragel::nfastar :Lazy + { + $$->mode = Factor::NfaLazy; + } + + ragel::nfastar :Greedy + { + $$->mode = Factor::NfaGreedy; + } + + # + # :nfa_wrap + # + ragel::nfawrap + { + Factor::NfaRepeatMode mode; + } + + ragel::nfawrap :Default + { + $$->mode = Factor::NfaGreedy; + } + + ragel::nfawrap :Lazy + { + $$->mode = Factor::NfaLazy; + } + + ragel::nfawrap :Greedy + { + $$->mode = Factor::NfaGreedy; + } + + # + # :cond + # + ragel::colon_cond + { + Factor::Type type; + } + + ragel::colon_cond :Cond + { + $$->type = Factor::CondStar; + } + + ragel::colon_cond :CondStar + { + $$->type = Factor::CondStar; + } + + ragel::colon_cond :CondPlus + { + $$->type = Factor::CondPlus; + } + + + # def factor + # [alphabet_num] :AlphabetNum + # | [word] :Word + # | [string] :String + # | [lex_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock + # | [lex_sqopen_neg reg_or_data re_or_sqclose] :NegOrBlock + # | [lex_regex_open regex re_close] :Regex + # | [RL1: range_lit `.. RL2: range_lit] :Range + # | [RL1: range_lit `../i RL2: range_lit] :RangeIndep + # | [`:nfa `( uint `, expression `, + # Push: action_ref `, Pop: action_ref `, Init: action_ref `, Stay: action_ref `, + # Repeat: action_ref `, Exit: action_ref `):] :Nfa + # | [`:cond `( uint `, expression `, + # Init: action_ref `, Inc: action_ref `, Min: action_ref OptMax: opt_max_arg `):] :Cond + # | [`( join `)] :Join + ragel::factor + { + Factor *factor; + } + + ragel::factor :Join + { + /* Create a new factor going to a parenthesized join. */ + $$->factor = new Factor( $join->join ); + $$->factor->join->loc = @1; + } + + ragel::factor :AlphabetNum + { + $$->factor = new Factor( new Literal( $alphabet_num->tok.loc, + $alphabet_num->neg, $alphabet_num->tok.data, + $alphabet_num->tok.length, Literal::Number ) ); + } + + ragel::factor :Word + { + InputLoc loc = @word; + string s( $word->data, $word->length ); + + /* Find the named graph. */ + GraphDictEl *gdNode = pd->graphDict.find( s ); + if ( gdNode == 0 ) { + /* Recover by returning null as the factor node. */ + pd->id->error(loc) << "graph lookup of \"" << s << "\" failed" << endl; + $$->factor = 0; + } + else if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + pd->id->error(loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + $$->factor = new Factor( loc, gdNode->value ); + } + } + + ragel::factor :String + { + $$->factor = new Factor( new Literal( @string, false, + $string->data, $string->length, Literal::LitString ) ); + } + + #ragel::factor :HexString + #{ + # $$->factor = new Factor( new Literal( @hex_string, false, + # $hex_string->data, $hex_string->length, Literal::HexString ) ); + #} + + ragel::factor :Range + { + $$->factor = new Factor( new Range( $RL1->literal, $RL2->literal, false ) ); + } + + ragel::factor :RangeIndep + { + $$->factor = new Factor( new Range( $RL1->literal, $RL2->literal, true ) ); + } + + # | [lex_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock + ragel::factor :PosOrBlock + { + $$->factor = new Factor( new ReItem( @lex_sqopen_pos, + $reg_or_data->reOrBlock, ReItem::OrBlock ) ); + } + + ragel::factor :NegOrBlock + { + $$->factor = new Factor( new ReItem( @lex_sqopen_neg, + $reg_or_data->reOrBlock, ReItem::NegOrBlock ) ); + } + + ragel::factor :Nfa + { + /* push, pop, init, stay, repeat, exit */ + $$->factor = new Factor( @1, pd->nextRepId++, $expression->expr, + $Push->action, $Pop->action, $Init->action, $Stay->action, + $Repeat->action, $Exit->action, Factor::NfaRep ); + $$->factor->mode = $1->mode; + } + + ragel::factor :NfaWrap + { + /* push, pop, init, stay, repeat, exit */ + $$->factor = new Factor( @1, pd->nextRepId++, $expression->expr, + $Push->action, $Pop->action, $Init->action, $Stay->action, + 0, $Exit->action, Factor::NfaWrap ); + $$->factor->mode = $1->mode; + } + + ragel::factor :Cond + { + /* init, inc, min, opt-max. */ + $$->factor = new Factor( @2, pd->nextRepId++, $expression->expr, + $Init->action, $Inc->action, $Min->action, $OptMax->action, 0, 0, $1->type ); + } + + ragel::factor :Regex + { + bool caseInsensitive = false; + checkLitOptions( pd->id, @re_close, $re_close->data, $re_close->length, caseInsensitive ); + if ( caseInsensitive ) + $2->regExpr->caseInsensitive = true; + $$->factor = new Factor( $regex->regExpr ); + } + + # def regex + # [reg_item_rep_list] :List + ragel::regex + { + RegExpr *regExpr; + } + + ragel::regex :List + { + $$->regExpr = $reg_item_rep_list->regExpr; + } + + # def reg_item_rep_list + # [reg_item_rep_list reg_item_rep] :Rec + # | [] :Base + ragel::reg_item_rep_list + { + RegExpr *regExpr; + } + ragel::reg_item_rep_list :Rec + { + $$->regExpr = new RegExpr( $_reg_item_rep_list->regExpr, + $reg_item_rep->reItem ); + } + ragel::reg_item_rep_list :Base + { + $$->regExpr = new RegExpr(); + } + + # def reg_item_rep + # [reg_item re_star] :Star + # | [reg_item] :Base + ragel::reg_item_rep + { + ReItem *reItem; + } + + ragel::reg_item_rep :Star + { + $$->reItem = $reg_item->reItem; + $$->reItem->star = true; + } + + ragel::reg_item_rep :Base + { + $$->reItem = $reg_item->reItem; + } + + # def reg_item + # [re_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock + # | [re_sqopen_neg reg_or_data re_or_sqclose] :NegOrBlock + # | [re_dot] :Dot + # | [re_char] :Char + ragel::reg_item + { + ReItem *reItem; + } + ragel::reg_item :PosOrBlock + { + $$->reItem = new ReItem( @1, $reg_or_data->reOrBlock, ReItem::OrBlock ); + } + ragel::reg_item :NegOrBlock + { + $$->reItem = new ReItem( @1, $reg_or_data->reOrBlock, ReItem::NegOrBlock ); + } + ragel::reg_item :Dot + { + $$->reItem = new ReItem( @1, ReItem::Dot ); + } + ragel::reg_item :Char + { + string data( $re_char->data, $re_char->length ); + char *c = unescape( data.c_str() ); + $$->reItem = new ReItem( @re_char, c, strlen(c) ); + delete[] c; + } + + # def reg_or_data + # [reg_or_data reg_or_char] :Data + # | [] :Base + ragel::reg_or_data + { + ReOrBlock *reOrBlock; + } + + ragel::reg_or_data :Data + { + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( $reg_or_char->reOrItem->type == ReOrItem::Data && + $_reg_or_data->reOrBlock->type == ReOrBlock::RecurseItem && + $_reg_or_data->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + $_reg_or_data->reOrBlock->item->data.append( $reg_or_char->reOrItem->data ); + delete $reg_or_char->reOrItem; + $$->reOrBlock = $_reg_or_data->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + $$->reOrBlock = new ReOrBlock( $_reg_or_data->reOrBlock, $reg_or_char->reOrItem ); + } + } + + ragel::reg_or_data :Base + { + $$->reOrBlock = new ReOrBlock(); + } + + # def reg_or_char + # [re_or_char] :Char + # | [Low: re_or_char re_or_dash High: re_or_char] :Range + ragel::reg_or_char + { + ReOrItem *reOrItem; + } + + ragel::reg_or_char :Char + { + // ReOrItem *reOrItem; + char *c = unescape( $re_or_char->data, $re_or_char->length ); + $$->reOrItem = new ReOrItem( @re_or_char, c, 1 ); + delete[] c; + } + + ragel::reg_or_char :Range + { + // ReOrItem *reOrItem; + char *low = unescape( $Low->data, $Low->length ); + char *high = unescape( $High->data, $High->length ); + $$->reOrItem = new ReOrItem( @re_or_dash, low[0], high[0] ); + delete[] low; + delete[] high; + } + + + # def alphabet_num + # [uint] :Uint + # | [`- uint] :Neg + # | [hex] :Hex + ragel::alphabet_num + { + bool neg; + RedToken tok; + } + + ragel::alphabet_num :Uint + { + $$->neg = false; + $$->tok.set( $uint, @uint ); + } + + ragel::alphabet_num :Neg + { + $$->neg = true; + $$->tok.set( $uint, @1 ); + } + + ragel::alphabet_num :Hex + { + $$->neg = false; + $$->tok.set( $hex, @hex ); + } + + # def range_lit + # [string] :String + # | [alphabet_num] :AN + ragel::range_lit + { + Literal *literal; + } + + ragel::range_lit :String + { + /* Range literals must have only one char. We restrict this in the + * parse tree. */ + $$->literal = new Literal( @string, false, + $string->data, $string->length, Literal::LitString ); + } + + ragel::range_lit :AN + { + $$->literal = new Literal( $alphabet_num->tok.loc, + $alphabet_num->neg, $alphabet_num->tok.data, + $alphabet_num->tok.length, Literal::Number ); + } + + # def lm + # [join] :Join + # | [`|* lm_stmt_list `*|] :Lm + ragel::lm + { + MachineDef *machineDef; + } + + ragel::lm :Join + { + $$->machineDef = new MachineDef( $join->join ); + } + + ragel::lm :Lm + { + /* Create a new factor going to a longest match structure. Record in + * the parse data that we have a longest match. */ + LongestMatch *lm = new LongestMatch( @1, $lm_stmt_list->lmPartList ); + pd->lmList.append( lm ); + for ( LmPartList::Iter lmp = *$lm_stmt_list->lmPartList; lmp.lte(); lmp++ ) + lmp->longestMatch = lm; + $$->machineDef = new MachineDef( lm ); + } + + ragel::lm :LmNfa + { + /* Create a new factor going to a longest match structure. Record in + * the parse data that we have a longest match. */ + LongestMatch *lm = new LongestMatch( @1, $lm_stmt_list->lmPartList ); + lm->nfaConstruction = true; + pd->lmList.append( lm ); + for ( LmPartList::Iter lmp = *$lm_stmt_list->lmPartList; lmp.lte(); lmp++ ) + lmp->longestMatch = lm; + $$->machineDef = new MachineDef( lm ); + } + + # def lm_stmt_list + # [lm_stmt_list lm_stmt] :Rec + # | [lm_stmt] :Base + ragel::lm_stmt_list + { + LmPartList *lmPartList; + } + ragel::lm_stmt_list :Rec + { + $$->lmPartList = $_lm_stmt_list->lmPartList; + if ( $lm_stmt->lmPart != 0 ) + $$->lmPartList->append( $lm_stmt->lmPart ); + } + ragel::lm_stmt_list :Base + { + $$->lmPartList = new LmPartList; + if ( $lm_stmt->lmPart != 0 ) + $$->lmPartList->append( $lm_stmt->lmPart ); + } + + + # def lm_stmt + # [join opt_lm_act `;] :LmStmt commit + # | [assignment] :Assignment + # | [action_spec] :ActionSpec + + ragel::lm_stmt + { + LongestMatchPart *lmPart; + } + + ragel::lm_stmt :LmStmt + { + InputLoc loc; + loc.line = 1; + loc.fileName = 0; + + Join *join = $join->join; + Action *action = $opt_lm_act->action; + + if ( action != 0 ) + action->isLmAction = true; + + /* Provide a location to join. Unfortunately We don't + * have the start of the join as in other occurances. Use the end. */ + join->loc = loc; + + $$->lmPart = new LongestMatchPart( join, action, + loc, pd->nextLongestMatchId++ ); + } + + ragel::lm_stmt :Assignment + { + $$->lmPart = 0; + } + + ragel::lm_stmt :ActionSpec + { + $$->lmPart = 0; + } + + # def opt_lm_act + # [lm_act] :Act + # | [] :Empty + ragel::opt_lm_act + { + Action *action; + } + + ragel::opt_lm_act :Act + { + $$->action = $lm_act->action; + } + + ragel::opt_lm_act :Empty + { + $$->action = 0; + } + + # def lm_act + # [`=> action_ref] :ActionRef + # | [action_block] :ActionBlock + ragel::lm_act + { + Action *action; + } + ragel::lm_act :ActionRef + { + $$->action = $action_ref->action; + } + ragel::lm_act :ActionBlock + { + /* Create the action, add it to the list and pass up. */ + Action *newAction = new Action( &$action_block->loc, std::string(), + $action_block->inlineList, pd->fsmCtx->nextCondId++ ); + pd->fsmCtx->actionList.append( newAction ); + $$->action = newAction; + } + + # def opt_export + # [`export] :Export + # | [] :Base + ragel::opt_export + { + bool isSet; + } + + ragel::opt_export :Export + { + $$->isSet = true; + } + + ragel::opt_export :Base + { + $$->isSet = false; + } + + + # def nfa_expr + # [nfa_expr `| term] :Union + # | [term] :Base + ragel::nfa_expr + { + NfaUnion *nfaUnion; + } + + ragel::nfa_expr :Union + { + $$->nfaUnion = $_nfa_expr->nfaUnion; + $$->nfaUnion->terms.append( $term->term ); + } + + ragel::nfa_expr :Base + { + $$->nfaUnion = new NfaUnion(); + $$->nfaUnion->terms.append( $term->term ); + } + + # def nfa_round_spec + # [uint `, uint] + ragel::nfa_round_spec + { + long depth; + long group; + } + + ragel::nfa_round_spec :Spec + { + // Convert the priority number to a long. Check for overflow. + errno = 0; + $$->depth = strtol( $Depth->data, 0, 10 ); + if ( $$->depth == LONG_MAX && errno == ERANGE ) + pd->id->error(@Depth) << "rounds " << $Depth->data << " overflows" << endl; + + $$->group = strtol( $Group->data, 0, 10 ); + if ( $$->group == LONG_MAX && errno == ERANGE ) + pd->id->error(@Groups) << "group " << $Group->data << " overflows" << endl; + } + + # def nfa_round_list + # [nfa_round_list `, nfa_round_spec] :Recurse + # | [nfa_round_spec] :Base + ragel::nfa_round_list + { + NfaRoundVect *roundsList; + } + + ragel::nfa_round_list :Recurse + { + $$->roundsList = $_nfa_round_list->roundsList; + $$->roundsList->append( NfaRound( $nfa_round_spec->depth, + $nfa_round_spec->group ) ); + } + + ragel::nfa_round_list :Base + { + $$->roundsList = new NfaRoundVect; + $$->roundsList->append( NfaRound( $nfa_round_spec->depth, + $nfa_round_spec->group ) ); + } + + # def nfa_rounds + # [`( nfa_round_list `)] :Rounds + ragel::nfa_rounds + { + NfaRoundVect *roundsList; + } + + ragel::nfa_rounds :Rounds + { + $$->roundsList = $nfa_round_list->roundsList; + } + + + ragel::write_arg :Word + { + string arg( $word->data, $word->length ); + writeArgs.push_back( arg ); + } +end + +reduction TopLevel + # def inline_block + # [block_item_list] :List + inline::inline_block + { + InlineList *inlineList; + } + + inline::inline_block :List + { + $$->inlineList = $block_item_list->inlineList; + } + + # def block_item_list + # [block_item block_item_list] :Rec + # | [] :Base + inline::block_item_list + { + InlineList *inlineList; + } + + inline::block_item_list :Rec + { + $$->inlineList = $_block_item_list->inlineList; + + if ( $block_item->inlineItem != 0 ) + $$->inlineList->prepend( $block_item->inlineItem ); + else if ( $block_item->inlineList != 0 ) { + $$->inlineList->prepend( *$block_item->inlineList ); + delete $block_item->inlineList; + } + } + + inline::block_item_list :Base + { + $$->inlineList = new InlineList; + } + + # def block_item + # [expr_any] :ExprAny + # | [block_symbol] :BlockSymbol + # | [block_interpret] :BlockInterpret + # | [`{ inline_block `}] :RecBlock + inline::block_item + { + InlineItem *inlineItem; + InlineList *inlineList; + } + + inline::block_item :ExprAny + { + $$->inlineItem = $expr_any->inlineItem; + } + + inline::block_item :BlockSymbol + { + $$->inlineItem = $block_symbol->inlineItem; + } + + inline::block_item :BlockInterpret + { + $$->inlineItem = $block_interpret->inlineItem; + } + + inline::block_item :RecBlock + { + $$->inlineList = $inline_block->inlineList; + $$->inlineList->prepend( new InlineItem( @1, "{", InlineItem::Text ) ); + $$->inlineList->append( new InlineItem( @1, "}", InlineItem::Text ) ); + $$->inlineItem = 0; + } + + # def expr_any + # [whitespace] :WS + #| [comment] :Comment + #| [string] :String + #| [number] :Number + #| [hex_number] :Hex + #| [ident] :Ident + #| [c_any] :Any + inline::expr_any + { + InlineItem *inlineItem; + } + + inline::expr_any :WS + { + string data( $whitespace->data, $whitespace->length ); + $$->inlineItem = new InlineItem( @whitespace, data, InlineItem::Text ); + } + + inline::expr_any :Comment + { + string data( $comment->data, $comment->length ); + $$->inlineItem = new InlineItem( @comment, data, InlineItem::Text ); + } + + inline::expr_any :String + { + string data( $string->data, $string->length ); + $$->inlineItem = new InlineItem( @string, data, InlineItem::Text ); + } + + inline::expr_any :Number + { + string data( $number->data, $number->length ); + $$->inlineItem = new InlineItem( @number, data, InlineItem::Text ); + } + + inline::expr_any :Hex + { + string data( $hex_number->data, $hex_number->length ); + $$->inlineItem = new InlineItem( @hex_number, data, InlineItem::Text ); + } + + inline::expr_any :Ident + { + string data( $ident->data, $ident->length ); + $$->inlineItem = new InlineItem( @ident, data, InlineItem::Text ); + } + + inline::expr_any :Any + { + string data( $c_any->data, $c_any->length ); + $$->inlineItem = new InlineItem( @c_any, data, InlineItem::Text ); + } + + # def block_symbol + # [`,] :B1 | [`;] :B2 | [`(] :B3 | [`)] :B4 | [`*] :B5 | [`::] :B6 + inline::block_symbol + { + InlineItem *inlineItem; + } + + inline::block_symbol :B1 + { + string data( $1->data, $1->length ); + $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); + } + + inline::block_symbol :B2 + { + string data( $1->data, $1->length ); + $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); + } + + inline::block_symbol :B3 + { + string data( $1->data, $1->length ); + $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); + } + + inline::block_symbol :B4 + { + string data( $1->data, $1->length ); + $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); + } + + inline::block_symbol :B5 + { + string data( $1->data, $1->length ); + $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); + } + + inline::block_symbol :B6 + { + string data( $1->data, $1->length ); + $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); + } + + # def block_interpret + # [expr_interpret] :ExprInterpret + # | [`fhold whitespace? `;] :Fhold + # | [`fgoto whitespace? `* inline_expr `;] :FgotoExpr + # | [`fnext whitespace? `* inline_expr `;] :FnextExpr + # | [`fcall whitespace? `* inline_expr `;] :FcallExpr + # | [`fncall whitespace? `* inline_expr `;] :FncallExpr + # | [`fexec inline_expr `;] :Fexec + # | [`fgoto state_ref srlex::`;] :FgotoSr + # | [`fnext state_ref srlex::`;] :FnextSr + # | [`fcall state_ref srlex::`;] :FcallSr + # | [`fncall state_ref srlex::`;] :FncallSr + # | [`fret `;] :Fret + # | [`fnret `;] :Fnret + # | [`fbreak `;] :Fbreak + # | [`fnbreak `;] :Fnbreak + inline::block_interpret + { + InlineItem *inlineItem; + } + + inline::block_interpret :Fhold + { + $$->inlineItem = new InlineItem( @1, InlineItem::Hold ); + } + inline::block_interpret :FgotoExpr + { + $$->inlineItem = new InlineItem( @1, InlineItem::GotoExpr ); + $$->inlineItem->children = $inline_expr->inlineList; + } + inline::block_interpret :FnextExpr + { + $$->inlineItem = new InlineItem( @1, InlineItem::NextExpr ); + $$->inlineItem->children = $inline_expr->inlineList; + } + inline::block_interpret :FcallExpr + { + $$->inlineItem = new InlineItem( @1, InlineItem::CallExpr ); + $$->inlineItem->children = $inline_expr->inlineList; + } + inline::block_interpret :FncallExpr + { + $$->inlineItem = new InlineItem( @1, InlineItem::NcallExpr ); + $$->inlineItem->children = $inline_expr->inlineList; + } + inline::block_interpret :Fexec + { + $$->inlineItem = new InlineItem( @1, InlineItem::Exec ); + $$->inlineItem->children = $inline_expr->inlineList; + } + inline::block_interpret :FgotoSr + { + $$->inlineItem = new InlineItem( @1, + $state_ref->nameRef, InlineItem::Goto ); + } + inline::block_interpret :FnextSr + { + $$->inlineItem = new InlineItem( @1, + $state_ref->nameRef, InlineItem::Next ); + } + inline::block_interpret :FcallSr + { + $$->inlineItem = new InlineItem( @1, + $state_ref->nameRef, InlineItem::Call ); + } + inline::block_interpret :FncallSr + { + $$->inlineItem = new InlineItem( @1, + $state_ref->nameRef, InlineItem::Ncall ); + } + inline::block_interpret :Fret + { + $$->inlineItem = new InlineItem( @1, InlineItem::Ret ); + } + inline::block_interpret :Fnret + { + $$->inlineItem = new InlineItem( @1, InlineItem::Nret ); + } + inline::block_interpret :Fbreak + { + $$->inlineItem = new InlineItem( @1, InlineItem::Break ); + } + inline::block_interpret :Fnbreak + { + $$->inlineItem = new InlineItem( @1, InlineItem::Nbreak ); + } + + inline::block_interpret :ExprInterpret + { + $$->inlineItem = $expr_interpret->inlineItem; + } + + # def inline_expr + # [expr_item_list] :List + inline::inline_expr + { + InlineList *inlineList; + } + + inline::inline_expr :List + { + $$->inlineList = $expr_item_list->inlineList; + } + + # def expr_item_list + # [expr_item_list expr_item] :Rec + # | [] :Empty + inline::expr_item_list + { + InlineList *inlineList; + } + + inline::expr_item_list :Rec + { + $$->inlineList = $_expr_item_list->inlineList; + $$->inlineList->append( $expr_item->inlineItem ); + } + + inline::expr_item_list :Empty + { + $$->inlineList = new InlineList; + } + + # def expr_item + # [expr_any] :ExprAny + # | [expr_symbol] :ExprSymbol + # | [expr_interpret] :ExprInterpret + inline::expr_item + { + InlineItem *inlineItem; + } + + inline::expr_item :ExprAny + { + $$->inlineItem = $expr_any->inlineItem; + } + inline::expr_item :ExprSymbol + { + string sym( $expr_symbol->sym ); + $$->inlineItem = new InlineItem( &$expr_symbol->loc, sym, InlineItem::Text ); + } + inline::expr_item :ExprInterpret + { + $$->inlineItem = $expr_interpret->inlineItem; + } + + # def expr_symbol + # [`,] | [`(] | [`)] | [`*] | [`::] + inline::expr_symbol + { + const char *sym; + colm_location loc; + } + + inline::expr_symbol :Comma + { $$->loc = *@1; $$->sym = ","; } + inline::expr_symbol :Open + { $$->loc = *@1; $$->sym = "("; } + inline::expr_symbol :Close + { $$->loc = *@1; $$->sym = ")"; } + inline::expr_symbol :Star + { $$->loc = *@1; $$->sym = "*"; } + inline::expr_symbol :DoubleColon + { $$->loc = *@1; $$->sym = "::"; } + + + # def expr_interpret + # [`fpc] :Fpc + # | [`fc] :Fc + # | [`fcurs] :Fcurs + # | [`ftargs] :Ftargs + # | [`fentry `( state_ref srlex::`)] :Fentry + # | [var_ref] :VarRef + inline::expr_interpret + { + InlineItem *inlineItem; + } + + inline::expr_interpret :Fpc + { + $$->inlineItem = new InlineItem( @1, InlineItem::PChar ); + } + + inline::expr_interpret :Fc + { + $$->inlineItem = new InlineItem( @1, InlineItem::Char ); + } + + inline::expr_interpret :Fcurs + { + $$->inlineItem = new InlineItem( @1, InlineItem::Curs ); + } + + inline::expr_interpret :Ftargs + { + $$->inlineItem = new InlineItem( @1, InlineItem::Targs ); + } + + inline::expr_interpret :Fentry + { + $$->inlineItem = new InlineItem( @1, $state_ref->nameRef, InlineItem::Entry ); + } + + inline::expr_interpret :VarRef + { + string data( $1->data + 1, $1->length - 1 ); + $$->inlineItem = new InlineItem( @1, InlineItem::Subst ); + + ActionParamList::Iter api = *paramList; + for ( ; api.lte(); api++ ) { + if ( (*api)->name == data ) + break; + } + + if ( api.end() ) + pd->id->error( @1 ) << "invalid parameter reference \"$" << $1->data << "\"" << endl; + else { + $$->inlineItem->substPos = api.pos(); + } + } + + host::section :MultiLine + { + if ( !isImport && includeDepth == 0 ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::EndSection; + inputItem->loc = @5; + id->inputItems.append( inputItem ); + + if ( section != 0 ) { + inputItem->section = section; + section->lastReference = inputItem; + } + + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + inputItem = new InputItem; + inputItem->type = InputItem::HostData; + inputItem->loc.fileName = 0; + + id->inputItems.append( inputItem ); + + /* Record the parse data and move over the end section. */ + id->curItem = id->curItem->next; + id->curItem->pd = pd; + + /* Move over the host data. */ + id->curItem = id->curItem->next; + } + } + + host::section :Token + { + if ( !isImport && includeDepth == 0 ) { + if ( id->curItem->loc.fileName == 0 ) + id->curItem->loc = @1; + + head_t *head = tree_to_str( prg, sp, $*1, false, false ); + id->curItem->data.write( head->data, head->length ); + } + } +end diff --git a/ragel/rlscan.h b/ragel/rlscan.h new file mode 100644 index 00000000..e8b4047d --- /dev/null +++ b/ragel/rlscan.h @@ -0,0 +1,136 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _RLSCAN_H +#define _RLSCAN_H + +#include <iostream> +#include "rlscan.h" +#include "vector.h" +#ifdef WITH_RAGEL_KELBT +#include "rlparse.h" +#endif +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + +using std::istream; +using std::ostream; + +extern char *Parser6_lelNames[]; +struct Section; + +struct Scanner +{ + Scanner( InputData *id, const char *fileName, istream &input, + Parser6 *inclToParser, char *inclSectionTarg, + int includeDepth, bool importMachines ) + : + id(id), fileName(fileName), + input(input), + inclToParser(inclToParser), + inclSectionTarg(inclSectionTarg), + includeDepth(includeDepth), + importMachines(importMachines), + cur_token(0), + line(1), column(1), lastnl(0), + parser(0), ignoreSection(false), + parserExistsError(false), + whitespaceOn(true), + lastToken(0), + section(0), + sectionPass(false) + {} + + void handleMachine(); + void handleInclude(); + void handleImport(); + + void init(); + void token( int type, char *start, char *end ); + void token( int type, char c ); + void token( int type ); + void processToken( int type, char *tokdata, int toklen ); + void directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ); + void flushImport( ); + void importToken( int type, char *start, char *end ); + void pass( int token, char *start, char *end ); + void pass(); + void updateCol(); + void startSection(); + void endSection(); + void do_scan(); + bool active(); + InputLoc scan_loc(); + + InputData *id; + const char *fileName; + istream &input; + Parser6 *inclToParser; + char *inclSectionTarg; + int includeDepth; + bool importMachines; + + /* For import parsing. */ + int tok_cs, tok_act; + int *tok_ts, *tok_te; + int cur_token; + static const int max_tokens = 32; + int token_data[max_tokens]; + char *token_strings[max_tokens]; + int token_lens[max_tokens]; + + /* For section processing. */ + int cs; + char *word, *lit; + int word_len, lit_len; + + /* For character scanning. */ + int line; + InputLoc sectionLoc; + char *ts, *te; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + Parser6 *parser; + bool ignoreSection; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; + + /* Keeps a record of the previous token sent to the section parser. */ + int lastToken; + + Section *section; + bool sectionPass; + +}; + +#endif diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl new file mode 100644 index 00000000..f745b9a0 --- /dev/null +++ b/ragel/rlscan.rl @@ -0,0 +1,1193 @@ +/* + * Copyright 2006-2007 Adrian Thurston <thurston@colm.net> + * Copyright 2011 Josef Goettgens + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "ragel.h" +#include "rlscan.h" +#include "inputdata.h" + +//#define LOG_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::endl; + +enum InlineBlockType +{ + CurlyDelimited, + SemiTerminated +}; + +char *newTokdata( int toklen ) +{ + char *tokdata = new char[sizeof(TokHead) + toklen + 1]; + return tokdata + sizeof(TokHead); +} + +void deleteTokdata( char *tokdata ) +{ + if ( tokdata ) + delete[] ( tokdata - sizeof(TokHead) ); +} + +void linkTokdata( Parser6 *parser, char *tokdata ) +{ + TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) ); + head->next = parser->tokHead; + parser->tokHead = head; +} + +void clearTokdata( Parser6 *parser ) +{ + while ( parser->tokHead != 0 ) { + TokHead *next = parser->tokHead->next; + delete[] (char*)parser->tokHead; + parser->tokHead = next; + } +} + +/* + * The Scanner for Importing + */ + +%%{ + machine inline_token_scan; + alphtype int; + access tok_; + + # Import scanner tokens. + import "rlparse.h"; + + main := |* + # Define of number. + IMP_Define IMP_Word IMP_UInt => { + int base = tok_ts - token_data; + int nameOff = 1; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Assignment of number. + IMP_Word '=' IMP_UInt => { + int base = tok_ts - token_data; + int nameOff = 0; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Define of literal. + IMP_Define IMP_Word IMP_Literal => { + int base = tok_ts - token_data; + int nameOff = 1; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Assignment of literal. + IMP_Word '=' IMP_Literal => { + int base = tok_ts - token_data; + int nameOff = 0; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Catch everything else. + any; + *|; +}%% + +%% write data; + +void Scanner::flushImport() +{ + int *p = token_data; + int *pe = token_data + cur_token; + int *eof = 0; + + %%{ + machine inline_token_scan; + write init; + write exec; + }%% + + if ( tok_ts == 0 ) + cur_token = 0; + else { + cur_token = pe - tok_ts; + int ts_offset = tok_ts - token_data; + memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); + memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); + memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); + } +} + +void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ) +{ + InputLoc loc; + + #ifdef LOG_TOKENS + cerr << "scanner:" << tokLine << ":" << tokColumn << + ": sending token to the parser " << Parser6_lelNames[type]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = tokFileName; + loc.line = tokLine; + loc.col = tokColumn; + + toParser->token( loc, type, tokdata, toklen ); +} + +void Scanner::importToken( int token, char *start, char *end ) +{ + if ( cur_token == max_tokens ) + flushImport(); + + token_data[cur_token] = token; + if ( start == 0 ) { + token_strings[cur_token] = 0; + token_lens[cur_token] = 0; + } + else { + int toklen = end-start; + token_lens[cur_token] = toklen; + token_strings[cur_token] = new char[toklen+1]; + memcpy( token_strings[cur_token], start, toklen ); + token_strings[cur_token][toklen] = 0; + } + cur_token++; +} + +void Scanner::pass() +{ + if ( sectionPass ) + return; + + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 ) + id->curItem->data.write( ts, te-ts ); +} + +void Scanner::pass( int token, char *start, char *end ) +{ + if ( sectionPass ) + return; + + if ( importMachines ) + importToken( token, start, end ); + + pass(); +} + +/* + * The scanner for processing sections, includes, imports, etc. + */ + +%%{ + machine section_parse; + alphtype int; + write data; +}%% + +void Scanner::init( ) +{ + %% write init; +} + +bool Scanner::active() +{ + if ( ignoreSection ) + return false; + + if ( parser == 0 && ! parserExistsError ) { + id->error(scan_loc()) << "this specification has no name, nor does any previous" + " specification" << endl; + parserExistsError = true; + } + + if ( parser == 0 ) + return false; + + return true; +} + +InputLoc Scanner::scan_loc() +{ + return makeInputLoc( fileName, line, column ); +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + column += te - from; + lastnl = 0; +} + +void Scanner::handleMachine() +{ + if ( sectionPass ) { + /* Assign a name to the machine. */ + char *machine = word; + + SectionDictEl *sdEl = id->sectionDict.find( machine ); + if ( sdEl == 0 ) { + sdEl = new SectionDictEl( machine ); + sdEl->value = new Section( machine ); + id->sectionDict.insert( sdEl ); + } + + section = sdEl->value; + } + else { + + /* Assign a name to the machine. */ + char *machine = word; + + if ( !importMachines && inclSectionTarg == 0 ) { + ignoreSection = false; + + ParserDictEl *pdEl = id->parserDict.find( machine ); + if ( pdEl == 0 ) { + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser6( id, fileName, machine, sectionLoc, + id->hostLang, id->minimizeLevel, id->minimizeOpt ); + pdEl->value->init(); + id->parserDict.insert( pdEl ); + id->parserList.append( pdEl->value ); + + /* Also into the parse data dict. This is the new style. */ + ParseDataDictEl *pddEl = new ParseDataDictEl( machine ); + pddEl->value = pdEl->value->pd; + id->parseDataDict.insert( pddEl ); + id->parseDataList.append( pddEl->value ); + } + + parser = pdEl->value; + } + else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { + /* found include target */ + ignoreSection = false; + parser = inclToParser; + } + else { + /* ignoring section */ + ignoreSection = true; + parser = 0; + } + } +} + +void Scanner::handleInclude() +{ + if ( sectionPass ) + return; + + if ( active() ) { + char *inclSectionName = word; + const char **includeChecks = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + } + else { + char *test = new char[strlen(fileName)+1]; + strcpy( test, fileName ); + + includeChecks = new const char*[2]; + + includeChecks[0] = test; + includeChecks[1] = 0; + } + + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "include: failed to locate file" << endl; + const char **tried = includeChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl; + } + else { + /* Don't include anything that's already been included. */ + if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { + parser->pd->includeHistory.push_back( IncludeHistoryItem( + includeChecks[found], inclSectionName ) ); + + Scanner scanner( id, includeChecks[found], *inFile, parser, + inclSectionName, includeDepth+1, false ); + scanner.do_scan( ); + } + + delete inFile; + } + } +} + +void Scanner::handleImport() +{ + if ( sectionPass ) + return; + + if ( active() ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "import: could not open import file " << + "for reading" << endl; + const char **tried = importChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl; + } + + Scanner scanner( id, importChecks[found], *inFile, parser, + 0, includeDepth+1, true ); + scanner.do_scan( ); + scanner.importToken( 0, 0, 0 ); + scanner.flushImport(); + delete inFile; + } +} + +%%{ + machine section_parse; + + # Need the defines representing tokens. + import "rlparse.h"; + + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_word { word = tokdata; word_len = toklen; } + action store_lit { lit = tokdata; lit_len = toklen; } + + action mach_err { id->error(scan_loc()) << "bad machine statement" << endl; } + action incl_err { id->error(scan_loc()) << "bad include statement" << endl; } + action import_err { id->error(scan_loc()) << "bad import statement" << endl; } + action write_err { id->error(scan_loc()) << "bad write statement" << endl; } + + action handle_machine { handleMachine(); } + action handle_include { handleInclude(); } + action handle_import { handleImport(); } + + machine_stmt = + ( KW_Machine TK_Word @store_word ';' ) @handle_machine + <>err mach_err <>eof mach_err; + + include_names = ( + TK_Word @store_word ( TK_Literal @store_lit )? | + TK_Literal @store_lit + ) >clear_words; + + include_stmt = + ( KW_Include include_names ';' ) @handle_include + <>err incl_err <>eof incl_err; + + import_stmt = + ( KW_Import TK_Literal @store_lit ';' ) @handle_import + <>err import_err <>eof import_err; + + action write_command + { + if ( sectionPass ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::Write; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + inputItem->name = section->sectionName; + inputItem->section = section; + + /* Track the last reference. */ + inputItem->section->lastReference = inputItem; + + id->inputItems.append( inputItem ); + } + else { + if ( includeDepth == 0 && active() && + id->machineSpec == 0 && id->machineName == 0 ) + { + id->curItem = id->curItem->next; + id->curItem->pd = parser->pd; + id->curItem->parser = parser; + id->checkLastRef( id->curItem ); + } + } + } + + action write_arg + { + if ( sectionPass ) { + } + else { + if ( active() && id->machineSpec == 0 && id->machineName == 0 ) + id->curItem->writeArgs.push_back( strdup(tokdata) ); + } + } + + action write_close + { + if ( sectionPass ) { + } + else { + /* if ( active() && id->machineSpec == 0 && id->machineName == 0 ) + * id->curItem->writeArgs.append( 0 ); */ + } + } + + write_stmt = + ( KW_Write @write_command + ( TK_Word @write_arg )+ ';' @write_close ) + <>err write_err <>eof write_err; + + action handle_token + { + if ( sectionPass ) { + deleteTokdata( tokdata ); + } + else { + /* Send the token off to the parser. */ + if ( active() ) { + if ( tokdata != 0 ) { + linkTokdata( parser, tokdata ); + } + + directToParser( parser, fileName, line, column, type, tokdata, toklen ); + } + else { + deleteTokdata( tokdata ); + } + } + } + + # Catch everything else. + everything_else = + ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token; + + main := ( + machine_stmt | + include_stmt | + import_stmt | + write_stmt | + everything_else + )*; +}%% + +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + if ( start != 0 ) { + toklen = end-start; + tokdata = newTokdata( toklen + 1 ); + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + processToken( type, tokdata, toklen ); +} + +void Scanner::processToken( int type, char *tokdata, int toklen ) +{ + int *p, *pe, *eof; + + if ( type < 0 ) + p = pe = eof = 0; + else { + p = &type; + pe = &type + 1; + eof = 0; + } + + %%{ + machine section_parse; + write exec; + }%% + + updateCol(); + + /* Record the last token for use in controlling the scan of subsequent + * tokens. */ + lastToken = type; +} + +void Scanner::startSection( ) +{ + parserExistsError = false; + + sectionLoc.fileName = fileName; + sectionLoc.line = line; + sectionLoc.col = column; +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + processToken( -1, 0, 0 ); + + if ( sectionPass ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::EndSection; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + if ( section != 0 ) { + inputItem->section = section; + section->lastReference = inputItem; + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::HostData; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + } + } + } + else { + /* Close off the section with the parser. */ + if ( includeDepth == 0 && active() ) { + InputLoc loc; + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + parser->token( loc, TK_EndSection, 0, 0 ); + + id->curItem = id->curItem->next; + + if ( parser != 0 ) { + id->curItem->pd = parser->pd; + id->curItem->parser = parser; + } + + id->checkLastRef( id->curItem ); + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + id->curItem = id->curItem->next; + id->checkLastRef( id->curItem ); + } + } + } +} + +%%{ + machine rlscan; + + # This is sent by the driver code. + EOF = 0; + + action inc_nl { + lastnl = p; + column = 0; + line++; + } + NL = '\n' @inc_nl; + + # Identifiers, numbers, commetns, and other common things. + ident = ( alpha | '_' ) ( alpha |digit |'_' )*; + ocaml_ident = ( alpha | '_' ) ( alpha |digit |'_' )* "'"?; + number = digit+; + hex_number = '0x' [0-9a-fA-F]+; + + c_comment = + '/*' ( any | NL )* :>> '*/'; + + cpp_comment = + '//' [^\n]* NL; + + c_cpp_comment = c_comment | cpp_comment; + + ruby_comment = '#' [^\n]* NL; + + # These literal forms are common to host code and ragel. + s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; + d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/'; + + whitespace = [ \t] | NL; + pound_comment = '#' [^\n]* NL; + + # An inline block of code for languages other than Ruby. + inline_code := |* + # Inline expression keywords. + "fpc" => { token( KW_PChar ); }; + "fc" => { token( KW_Char ); }; + "fcurs" => { token( KW_CurState ); }; + "ftargs" => { token( KW_TargState ); }; + "fentry" => { + whitespaceOn = false; + token( KW_Entry ); + }; + + # Inline statement keywords. + "fhold" => { + whitespaceOn = false; + token( KW_Hold ); + }; + "fexec" => { token( KW_Exec, 0, 0 ); }; + "fgoto" => { + whitespaceOn = false; + token( KW_Goto ); + }; + "fnext" => { + whitespaceOn = false; + token( KW_Next ); + }; + "fcall" => { + whitespaceOn = false; + token( KW_Call ); + }; + "fret" => { + whitespaceOn = false; + token( KW_Ret ); + }; + "fbreak" => { + whitespaceOn = false; + token( KW_Break ); + }; + "fncall" => { + whitespaceOn = false; + token( KW_Ncall ); + }; + "fnret" => { + whitespaceOn = false; + token( KW_Nret ); + }; + "fnbreak" => { + whitespaceOn = false; + token( KW_Nbreak ); + }; + + ident => { token( TK_Word, ts, te ); }; + + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + ( s_literal | d_literal ) + => { token( IL_Literal, ts, te ); }; + + whitespace+ => { + if ( whitespaceOn ) + token( IL_WhiteSpace, ts, te ); + }; + + c_cpp_comment => { token( IL_Comment, ts, te ); }; + + "::" => { token( TK_NameSep, ts, te ); }; + + # Some symbols need to go to the parser as with their cardinal value as + # the token type (as opposed to being sent as anonymous symbols) + # because they are part of the sequences which we interpret. The * ) ; + # symbols cause whitespace parsing to come back on. This gets turned + # off by some keywords. + + ";" => { + whitespaceOn = true; + token( *ts, ts, te ); + if ( inlineBlockType == SemiTerminated ) + fret; + }; + + "$" [a-zA-Z_][a-zA-Z_0-9]* => { + if ( parser != 0 && parser->parseSubstitutions ) + token( TK_SubstRef, ts+1, te ); + else { + token( IL_Symbol, ts, ts+1 ); + fexec ts+1; + } + }; + + [*)] => { + whitespaceOn = true; + token( *ts, ts, te ); + }; + + [,(] => { token( *ts, ts, te ); }; + + '{' => { + token( IL_Symbol, ts, te ); + curly_count += 1; + }; + + '}' => { + if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { + /* Inline code block ends. */ + token( '}' ); + fret; + } + else { + /* Either a semi terminated inline block or only the closing + * brace of some inner scope, not the block's closing brace. */ + token( IL_Symbol, ts, te ); + } + }; + + EOF => { + id->error(scan_loc()) << "unterminated code block" << endl; + }; + + # Send every other character as a symbol. + any => { token( IL_Symbol, ts, te ); }; + *|; + + or_literal := |* + # Escape sequences in OR expressions. + '\\0' => { token( RE_Char, '\0' ); }; + '\\a' => { token( RE_Char, '\a' ); }; + '\\b' => { token( RE_Char, '\b' ); }; + '\\t' => { token( RE_Char, '\t' ); }; + '\\n' => { token( RE_Char, '\n' ); }; + '\\v' => { token( RE_Char, '\v' ); }; + '\\f' => { token( RE_Char, '\f' ); }; + '\\r' => { token( RE_Char, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( RE_Char, ts+1, te ); }; + + # Range dash in an OR expression. + '-' => { token( RE_Dash, 0, 0 ); }; + + # Terminate an OR expression. + ']' => { token( RE_SqClose ); fret; }; + + EOF => { + id->error(scan_loc()) << "unterminated OR literal" << endl; + }; + + # Characters in an OR expression. + [^\]] => { token( RE_Char, ts, te ); }; + + *|; + + ragel_re_literal := |* + # Escape sequences in regular expressions. + '\\0' => { token( RE_Char, '\0' ); }; + '\\a' => { token( RE_Char, '\a' ); }; + '\\b' => { token( RE_Char, '\b' ); }; + '\\t' => { token( RE_Char, '\t' ); }; + '\\n' => { token( RE_Char, '\n' ); }; + '\\v' => { token( RE_Char, '\v' ); }; + '\\f' => { token( RE_Char, '\f' ); }; + '\\r' => { token( RE_Char, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( RE_Char, ts+1, te ); }; + + # Terminate an OR expression. + '/' [i]? => { + token( RE_Slash, ts, te ); + fgoto parser_def; + }; + + # Special characters. + '.' => { token( RE_Dot ); }; + '*' => { token( RE_Star ); }; + + '[' => { token( RE_SqOpen ); fcall or_literal; }; + '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + + EOF => { + id->error(scan_loc()) << "unterminated regular expression" << endl; + }; + + # Characters in an OR expression. + [^\/] => { token( RE_Char, ts, te ); }; + *|; + + # We need a separate token space here to avoid the ragel keywords. + write_statement := |* + ident => { token( TK_Word, ts, te ); } ; + [ \t\n]+ => { updateCol(); }; + ';' => { token( ';' ); fgoto parser_def; }; + + EOF => { + id->error(scan_loc()) << "unterminated write statement" << endl; + }; + *|; + + # Parser definitions. + parser_def := |* + #'length_cond' => { token( KW_Length ); }; + 'machine' => { token( KW_Machine ); }; + 'include' => { token( KW_Include ); }; + 'import' => { token( KW_Import ); }; + 'write' => { + token( KW_Write ); + fgoto write_statement; + }; + 'action' => { token( KW_Action ); }; + 'alphtype' => { token( KW_AlphType ); }; + 'prepush' => { token( KW_PrePush ); }; + 'postpop' => { token( KW_PostPop ); }; + + 'nfaprepush' => { token( KW_NfaPrePush ); }; + 'nfapostpop' => { token( KW_NfaPostPop ); }; + + # FIXME: Enable this post 5.17. + # 'range' => { token( KW_Range ); }; + + 'getkey' => { + token( KW_GetKey ); + inlineBlockType = SemiTerminated; + fcall inline_code; + }; + 'access' => { + token( KW_Access ); + inlineBlockType = SemiTerminated; + fcall inline_code; + }; + 'variable' => { + token( KW_Variable ); + inlineBlockType = SemiTerminated; + fcall inline_code; + }; + 'when' => { token( KW_When ); }; + 'inwhen' => { token( KW_InWhen ); }; + 'outwhen' => { token( KW_OutWhen ); }; + 'eof' => { token( KW_Eof ); }; + 'err' => { token( KW_Err ); }; + 'lerr' => { token( KW_Lerr ); }; + 'to' => { token( KW_To ); }; + 'from' => { token( KW_From ); }; + 'export' => { token( KW_Export ); }; + + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + # Numbers + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + # Literals, with optionals. + ( s_literal | d_literal ) [i]? + => { token( TK_Literal, ts, te ); }; + + '[' => { token( RE_SqOpen ); fcall or_literal; }; + '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + + '/' => { token( RE_Slash ); fgoto ragel_re_literal; }; + + # Ignore. + pound_comment => { updateCol(); }; + + ':=' => { token( TK_ColonEquals ); }; + '|=' => { token( TK_BarEquals ); }; + + # To State Actions. + ">~" => { token( TK_StartToState ); }; + "$~" => { token( TK_AllToState ); }; + "%~" => { token( TK_FinalToState ); }; + "<~" => { token( TK_NotStartToState ); }; + "@~" => { token( TK_NotFinalToState ); }; + "<>~" => { token( TK_MiddleToState ); }; + + # From State actions + ">*" => { token( TK_StartFromState ); }; + "$*" => { token( TK_AllFromState ); }; + "%*" => { token( TK_FinalFromState ); }; + "<*" => { token( TK_NotStartFromState ); }; + "@*" => { token( TK_NotFinalFromState ); }; + "<>*" => { token( TK_MiddleFromState ); }; + + # EOF Actions. + ">/" => { token( TK_StartEOF ); }; + "$/" => { token( TK_AllEOF ); }; + "%/" => { token( TK_FinalEOF ); }; + "</" => { token( TK_NotStartEOF ); }; + "@/" => { token( TK_NotFinalEOF ); }; + "<>/" => { token( TK_MiddleEOF ); }; + + # Global Error actions. + ">!" => { token( TK_StartGblError ); }; + "$!" => { token( TK_AllGblError ); }; + "%!" => { token( TK_FinalGblError ); }; + "<!" => { token( TK_NotStartGblError ); }; + "@!" => { token( TK_NotFinalGblError ); }; + "<>!" => { token( TK_MiddleGblError ); }; + + # Local error actions. + ">^" => { token( TK_StartLocalError ); }; + "$^" => { token( TK_AllLocalError ); }; + "%^" => { token( TK_FinalLocalError ); }; + "<^" => { token( TK_NotStartLocalError ); }; + "@^" => { token( TK_NotFinalLocalError ); }; + "<>^" => { token( TK_MiddleLocalError ); }; + + # Middle. + "<>" => { token( TK_Middle ); }; + + # Conditions. + '>?' => { token( TK_StartCond ); }; + '$?' => { token( TK_AllCond ); }; + '%?' => { token( TK_LeavingCond ); }; + + '..' => { token( TK_DotDot ); }; + '../i' => { token( TK_DotDotIndep ); }; + + '**' => { token( TK_StarStar ); }; + '--' => { token( TK_DashDash ); }; + '->' => { token( TK_Arrow ); }; + '=>' => { token( TK_DoubleArrow ); }; + + ":>" => { token( TK_ColonGt ); }; + ":>>" => { token( TK_ColonGtGt ); }; + "<:" => { token( TK_LtColon ); }; + + ":nfa(" => { token( TK_ColonNfaOpen ); }; + ":cond(" => { token( TK_ColonCondOpen ); }; + ":condstar(" => { token( TK_ColonCondStarOpen ); }; + ":condplus(" => { token( TK_ColonCondPlusOpen ); }; + ":nomax(" => { token( TK_ColonNoMaxOpen ); }; + "):" => { token( TK_CloseColon ); }; + + # Opening of longest match. + "|*" => { token( TK_BarStar ); }; + + # Separater for name references. + "::" => { token( TK_NameSep, ts, te ); }; + + '}%%' => { + updateCol(); + endSection(); + fret; + }; + + [ \t\r]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { + updateCol(); + if ( singleLineSpec ) { + endSection(); + fret; + } + }; + + '{' => { + if ( lastToken == KW_Export || lastToken == KW_Entry ) + token( '{' ); + else { + token( '{' ); + curly_count = 1; + inlineBlockType = CurlyDelimited; + fcall inline_code; + } + }; + + EOF => { + id->error(scan_loc()) << "unterminated ragel section" << endl; + }; + + any => { token( *ts ); } ; + *|; + + # Outside code scanner. These tokens get passed through. + main := |* + 'define' => { pass( IMP_Define, 0, 0 ); }; + ident => { pass( IMP_Word, ts, te ); }; + number => { pass( IMP_UInt, ts, te ); }; + c_cpp_comment => { pass(); }; + ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); }; + + '%%{' => { + updateCol(); + singleLineSpec = false; + startSection(); + fcall parser_def; + }; + '%%' => { + updateCol(); + singleLineSpec = true; + startSection(); + fcall parser_def; + }; + whitespace+ => { pass(); }; + EOF; + any => { pass( *ts, 0, 0 ); }; + *|; +}%% + +%% write data; + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + int cs, act, have = 0; + int top; + + /* The stack is two deep, one level for going into ragel defs from the main + * machines which process outside code, and another for going into or literals + * from either a ragel spec, or a regular expression. */ + int stack[2]; + int curly_count = 0; + bool execute = true; + bool singleLineSpec = false; + InlineBlockType inlineBlockType = CurlyDelimited; + + line = 1; + column = 1; + lastnl = 0; + + /* Init the section parser and the character scanner. */ + init(); + %% write init; + + /* Set up the start state. FIXME: After 5.20 is released the nocs write + * init option should be used, the main machine eliminated and this statement moved + * above the write init. */ + cs = rlscan_en_main; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + char *pe = p + len; + + /* If we see eof then append the eof var. */ + char *eof = 0; + if ( len == 0 ) { + eof = pe; + execute = false; + } + + %% write exec; + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + id->error(scan_loc()) << "scanner error" << endl; + id->abortCompile( 1 ); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + + delete[] buf; +} diff --git a/ragel/switch.cc b/ragel/switch.cc new file mode 100644 index 00000000..8355cdac --- /dev/null +++ b/ragel/switch.cc @@ -0,0 +1,1036 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "switch.h" +#include "redfsm.h" +#include "gendata.h" + +#include <assert.h> + +std::ostream &Switch::TRANS_GOTO( int off, RedTransAp *trans ) +{ + out << "_trans = " << off << ";\n"; + return out; +} + +void Switch::RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = keyOps->eq( data[mid].lowKey, lower ); + bool limitHigh = keyOps->eq( data[mid].highKey, upper ); + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + out << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + out << "} else {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value ) << "\n"; + out << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << "} else {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << "if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << "} else {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid].lowKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << "if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + out << "{\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + } + } +} + +void Switch::SINGLE_SWITCH( RedStateAp *st ) +{ + /* Load up the singles. */ + int numSingles = st->outSingle.length(); + RedTransEl *data = st->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_KEY() << " == " << + KEY(data[0].lowKey) << " ) {\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(transBase, data[0].value) << "\n"; + out << "\t}\n"; + + out << "else {\n"; + NOT_SINGLE( st ); + out << "}\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_KEY() << " ) {\n"; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << CASE( KEY(data[j].lowKey) ) << " {\n"; + TRANS_GOTO(transBase + j, data[j].value) << "\n"; + out << CEND() << "\n}\n"; + } + + out << CodeGen::DEFAULT() << " {\n"; + NOT_SINGLE( st ); + out << CEND() << "\n}\n"; + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void Switch::DEFAULT( RedStateAp *st ) +{ + if ( st->defTrans != 0 ) { + TRANS_GOTO( transBase + st->outSingle.length() + st->outRange.length(), st->defTrans ) << "\n"; + } +} + +void Switch::NOT_SINGLE( RedStateAp *st ) +{ + if ( st->outRange.length() > 0 ) { + RANGE_B_SEARCH( st, keyOps->minKey, keyOps->maxKey, + 0, st->outRange.length() - 1 ); + } + else { + DEFAULT( st ); + } +} + +void Switch::LOCATE_TRANS() +{ + transBase = 0; + + out << + " switch ( " << vCS() << " ) {\n"; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) { + out << CASE( STR( st->id ) ) << " {\n"; + out << CEND() << "\n}\n"; + } + else { + /* Label the state. */ + out << CASE( STR( st->id ) ) << " {\n"; + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) { + SINGLE_SWITCH( st ); + } + else { + NOT_SINGLE( st ); + } + + out << CEND() << "\n}\n"; + } + + transBase += st->outSingle.length() + + st->outRange.length() + + ( st->defTrans != 0 ? 1 : 0 ); + } + + out << + " }\n" + "\n"; +} + +void Switch::genAnalysis() +{ + redFsm->sortByStateId(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose the singles. */ + redFsm->moveSelectTransToSingle(); + + if ( redFsm->errState != 0 ) + redFsm->getErrorCond(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + setKeyType(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + + +void Switch::tableDataPass() +{ + if ( type == Loop ) + taActions(); + + taKeyOffsets(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + taIndicies(); + + taTransCondSpacesWi(); + taTransOffsetsWi(); + taTransLengthsWi(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondTargs(); + taCondActions(); + + taToStateActions(); + taFromStateActions(); + taEofActions(); + taEofConds(); + taEofTrans(); + + taKeys(); + taCondKeys(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Switch::writeData() +{ + if ( type == Loop ) { + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) + taActions(); + } + + taKeyOffsets(); + taKeys(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondKeys(); + taCondTargs(); + taCondActions(); + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + taEofConds(); + + if ( redFsm->anyEofTrans() ) + taEofTrans(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + + +void Switch::setKeyType() +{ + transKeys.setType( ALPH_TYPE(), alphType->size, alphType->isChar ); + transKeys.isSigned = keyOps->isSigned; +} + +void Switch::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +void Switch::taKeyOffsets() +{ + keyOffsets.start(); + + int curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + keyOffsets.value( curKeyOffset ); + curKeyOffset += st->outSingle.length() + st->outRange.length() * 2; + } + + keyOffsets.finish(); +} + + +void Switch::taSingleLens() +{ + singleLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + singleLens.value( st->outSingle.length() ); + + singleLens.finish(); +} + + +void Switch::taRangeLens() +{ + rangeLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + rangeLens.value( st->outRange.length() ); + + rangeLens.finish(); +} + +void Switch::taIndexOffsets() +{ + indexOffsets.start(); + + int curIndOffset = 0; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + indexOffsets.value( curIndOffset ); + + /* Move the index offset ahead. */ + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + + indexOffsets.finish(); +} + +void Switch::taToStateActions() +{ + toStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + TO_STATE_ACTION(st); + + toStateActions.finish(); +} + +void Switch::taFromStateActions() +{ + fromStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + FROM_STATE_ACTION(st); + + fromStateActions.finish(); +} + +void Switch::taEofActions() +{ + eofActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + EOF_ACTION( st ); + + eofActions.finish(); +} + +void Switch::taEofConds() +{ + /* + * EOF Cond Spaces + */ + eofCondSpaces.start(); + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) + eofCondSpaces.value( st->outCondSpace->condSpaceId ); + else + eofCondSpaces.value( -1 ); + } + eofCondSpaces.finish(); + + /* + * EOF Cond Key Indixes + */ + eofCondKeyOffs.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long off = 0; + if ( st->outCondSpace != 0 ) { + off = curOffset; + curOffset += st->outCondKeys.length(); + } + eofCondKeyOffs.value( off ); + } + + eofCondKeyOffs.finish(); + + /* + * EOF Cond Key Lengths. + */ + eofCondKeyLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long len = 0; + if ( st->outCondSpace != 0 ) + len = st->outCondKeys.length(); + eofCondKeyLens.value( len ); + } + + eofCondKeyLens.finish(); + + /* + * EOF Cond Keys + */ + eofCondKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) { + for ( int c = 0; c < st->outCondKeys.length(); c++ ) { + CondKey key = st->outCondKeys[c]; + eofCondKeys.value( key.getVal() ); + } + } + } + + eofCondKeys.finish(); +} + +void Switch::taEofTrans() +{ + eofTrans.start(); + + /* Need to compute transition positions. */ + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + totalTrans += st->outSingle.length(); + totalTrans += st->outRange.length(); + if ( st->defTrans != 0 ) + totalTrans += 1; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long trans = 0; + if ( st->eofTrans != 0 ) { + trans = totalTrans + 1; + totalTrans += 1; + } + + eofTrans.value( trans ); + } + + eofTrans.finish(); +} + +void Switch::taKeys() +{ + transKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + transKeys.value( stel->lowKey.getVal() ); + } + + /* Loop the state's transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + /* Lower key. */ + transKeys.value( rtel->lowKey.getVal() ); + + /* Upper key. */ + transKeys.value( rtel->highKey.getVal() ); + } + } + + transKeys.finish(); +} + +void Switch::taIndicies() +{ + indicies.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + indicies.value( stel->value->id ); + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + indicies.value( rtel->value->id ); + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + indicies.value( st->defTrans->id ); + } + + indicies.finish(); +} + +void Switch::taTransCondSpaces() +{ + transCondSpaces.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + transCondSpaces.finish(); +} + +void Switch::taTransOffsets() +{ + transOffsets.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + errCondOffset = curOffset; + + transOffsets.finish(); +} + +void Switch::taTransLengths() +{ + transLengths.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transLengths.value( trans->numConds() ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transLengths.value( trans->numConds() ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transLengths.value( trans->numConds() ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transLengths.value( trans->numConds() ); + } + } + + transLengths.finish(); +} + +void Switch::taTransCondSpacesWi() +{ + transCondSpacesWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Cond Space id. */ + if ( trans->condSpace != 0 ) + transCondSpacesWi.value( trans->condSpace->condSpaceId ); + else + transCondSpacesWi.value( -1 ); + } + + transCondSpacesWi.finish(); +} + +void Switch::taTransOffsetsWi() +{ + transOffsetsWi.start(); + + int curOffset = 0; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transOffsetsWi.value( curOffset ); + + TransApSet::Iter next = trans; + next.increment(); + + curOffset += trans->numConds(); + } + + transOffsetsWi.finish(); +} + +void Switch::taTransLengthsWi() +{ + transLengthsWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transLengthsWi.value( trans->numConds() ); + + TransApSet::Iter next = trans; + next.increment(); + } + + transLengthsWi.finish(); +} + +void Switch::taCondKeys() +{ + condKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + condKeys.finish(); +} + +void Switch::taCondTargs() +{ + condTargs.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + condTargs.value( cond->targ->id ); + } + + condTargs.finish(); +} + +void Switch::taCondActions() +{ + condActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + COND_ACTION( cond ); + } + + condActions.finish(); +} + +void Switch::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Switch::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Switch::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + + nfaPopTrans.value( 0 ); + + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + +void Switch::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + nfaOffsets.value( 0 ); + } + else { + nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + nfaOffsets.finish(); +} + + +/* Write out the array of actions. */ +std::ostream &Switch::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +void Switch::taActions() +{ + actions.start(); + + /* Put "no-action" at the beginning. */ + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + + + + diff --git a/ragel/switch.h b/ragel/switch.h new file mode 100644 index 00000000..9af753df --- /dev/null +++ b/ragel/switch.h @@ -0,0 +1,106 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_SWITCH_H +#define _C_SWITCH_H + +#include <iostream> +#include "codegen.h" +#include "tables.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +class Switch + : public virtual Tables +{ +protected: + enum Type { + Loop = 1, Exp + }; + +public: + Switch( const CodeGenArgs &args, Type type ) + : + Tables( args ), + type(type) + {} + + std::ostream &TRANS_GOTO( int off, RedTransAp *trans ); + void RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ); + void SINGLE_SWITCH( RedStateAp *st ); + void DEFAULT( RedStateAp *st ); + void NOT_SINGLE( RedStateAp *st ); + void LOCATE_TRANS(); + +protected: + Type type; + int transBase; + + std::ostream &COND_KEYS_v1(); + std::ostream &COND_SPACES_v1(); + std::ostream &INDICIES(); + std::ostream &INDEX_OFFSETS(); + std::ostream &SINGLE_LENS(); + std::ostream &RANGE_LENS(); + std::ostream &TRANS_TARGS_WI(); + std::ostream &ACTIONS_ARRAY(); + + void taKeyOffsets(); + void taSingleLens(); + void taRangeLens(); + void taIndexOffsets(); + void taIndicies(); + void taTransCondSpacesWi(); + void taTransOffsetsWi(); + void taTransLengthsWi(); + void taTransCondSpaces(); + void taTransOffsets(); + void taTransLengths(); + void taCondTargs(); + void taCondActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofTrans(); + void taEofConds(); + void taEofActions(); + void taKeys(); + void taActions(); + void taCondKeys(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void setKeyType(); + + void setTableState( TableArray::State ); + + virtual void writeData(); + virtual void tableDataPass(); + virtual void genAnalysis(); +}; + +#endif diff --git a/ragel/switchbreak.cc b/ragel/switchbreak.cc new file mode 100644 index 00000000..8b162984 --- /dev/null +++ b/ragel/switchbreak.cc @@ -0,0 +1,75 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "switchbreak.h" + +void SwitchBreak::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + string(trans) + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + string(trans) + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } + + out << EMIT_LABEL( _match_cond ); +} + diff --git a/ragel/switchbreak.h b/ragel/switchbreak.h new file mode 100644 index 00000000..fdbac68c --- /dev/null +++ b/ragel/switchbreak.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_SWITCHBREAK_H +#define RAGEL_SWITCHBREAK_H + +#include "switch.h" +#include "actloop.h" +#include "actexp.h" + +struct SwitchBreak +: + public Switch, public TabBreak +{ + SwitchBreak( const CodeGenArgs &args, Switch::Type type ) + : + Tables( args ), + Switch( args, type ), + TabBreak( args ) + {} + + void LOCATE_COND(); +}; + +class SwitchBreakLoop + : public SwitchBreak, public ActLoop +{ +public: + SwitchBreakLoop( const CodeGenArgs &args ) + : + Tables( args ), + SwitchBreak( args, Loop ), + ActLoop( args ) + {} +}; + + +class SwitchBreakExp + : public SwitchBreak, public ActExp +{ +public: + SwitchBreakExp( const CodeGenArgs &args ) + : + Tables( args ), + SwitchBreak( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/ragel/switchgoto.cc b/ragel/switchgoto.cc new file mode 100644 index 00000000..38488ee2 --- /dev/null +++ b/ragel/switchgoto.cc @@ -0,0 +1,73 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "switchgoto.h" + +void SwitchGoto::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + string(trans) + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + string(trans) + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/ragel/switchgoto.h b/ragel/switchgoto.h new file mode 100644 index 00000000..d8207325 --- /dev/null +++ b/ragel/switchgoto.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_SWITCHGOTO_H +#define RAGEL_SWITCHGOTO_H + +#include "switch.h" +#include "actloop.h" +#include "actexp.h" + +struct SwitchGoto +: + public Switch, public TabGoto +{ + SwitchGoto( const CodeGenArgs &args, Switch::Type type ) + : + Tables( args ), + Switch( args, type ), + TabGoto( args ) + {} + + void LOCATE_COND(); +}; + +class SwitchGotoLoop + : public SwitchGoto, public ActLoop +{ +public: + SwitchGotoLoop( const CodeGenArgs &args ) + : + Tables( args ), + SwitchGoto( args, Loop ), + ActLoop( args ) + {} +}; + + +class SwitchGotoExp + : public SwitchGoto, public ActExp +{ +public: + SwitchGotoExp( const CodeGenArgs &args ) + : + Tables( args ), + SwitchGoto( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/ragel/switchvar.cc b/ragel/switchvar.cc new file mode 100644 index 00000000..a25722e3 --- /dev/null +++ b/ragel/switchvar.cc @@ -0,0 +1,77 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "switchvar.h" +#include "parsedata.h" +#include "inputdata.h" + +void SwitchVar::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + string(trans) + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + string(trans) + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/ragel/switchvar.h b/ragel/switchvar.h new file mode 100644 index 00000000..5ed003df --- /dev/null +++ b/ragel/switchvar.h @@ -0,0 +1,72 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_SWITCHVAR_H +#define RAGEL_BINVAR_H + +#include "switch.h" +#include "actloop.h" +#include "actexp.h" + +struct SwitchVar +: + public Switch, public TabVar +{ + SwitchVar( const CodeGenArgs &args, Switch::Type type ) + : + Tables( args ), + Switch( args, type ), + TabVar( args ) + {} + + void VAR_COND_BIN_SEARCH( Variable &var, TableArray &keys, std::string ok, std::string error ); + + //void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class SwitchVarLoop + : public SwitchVar, public ActLoop +{ +public: + SwitchVarLoop( const CodeGenArgs &args ) + : + Tables( args ), + SwitchVar( args, Loop ), + ActLoop( args ) + {} +}; + +class SwitchVarExp +: + public SwitchVar, public ActExp +{ +public: + SwitchVarExp( const CodeGenArgs &args ) + : + Tables( args ), + SwitchVar( args, Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/ragel/tabbreak.cc b/ragel/tabbreak.cc new file mode 100644 index 00000000..4c69138a --- /dev/null +++ b/ragel/tabbreak.cc @@ -0,0 +1,378 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" +#include "binary.h" +#include "flat.h" + +std::string TabBreak::BREAK( GotoLabel &label ) +{ + string ret = "break"; + if ( loopLabels ) { + ret += " "; + ret += label; + } + return ret; +} + +std::string TabBreak::CONTINUE( GotoLabel &label ) +{ + string ret = "continue"; + if ( loopLabels ) { + ret += " "; + ret += label; + } + return ret; +} + +std::string TabBreak::BREAK_LABEL( GotoLabel &label ) +{ + if ( loopLabels ) { + if ( label.isReferenced ) + return std::string(label.name) + "::\n"; + } + return ""; +} + +void TabBreak::CONTROL_JUMP( ostream &ret, bool inFinish ) +{ + ret << "if ( " << TRUE() << " ) break " << _again << ";"; +} + +void TabBreak::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << ";"; + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << " = " << + callDest << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << "; " << CLOSE_GEN_BLOCK(); +} + +void TabBreak::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void TabBreak::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + "break " << _resume << "; " << + CLOSE_GEN_BLOCK(); +} + +void TabBreak::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + nbreak << " = 1;" << + CLOSE_GEN_BLOCK(); +} + +void TabBreak::writeExec() +{ + out << + " {\n"; + + DECLARE( INT(), ps ); + DECLARE( INT(), cpc ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), klen ); + DECLARE( INDEX( ARR_TYPE( condKeys ) ), ckeys ); + DECLARE( INDEX( ARR_TYPE( eofCondKeys ) ), cekeys ); + DECLARE( UINT(), trans, " = 0" ); + DECLARE( UINT(), cond, " = 0" ); + DECLARE( INDEX( ALPH_TYPE() ), keys ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( INDEX( ARR_TYPE( indicies ) ), inds ); + DECLARE( UINT(), nacts ); + DECLARE( INT(), have ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INT(), ic ); + + out << BREAK_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " while ( " << P() << " != " << PE() << " || " << P() << " == " << vEOF() << " ) {\n"; + } + else { + out << + " while ( " << P() << " != " << PE() << " ) {\n"; + } + } + else { + out << + " while ( " << TRUE() << " ) {\n"; + + } + + NFA_PUSH( vCS() ); + + if ( loopLabels ) { + out << BREAK_LABEL( _again ); + out << "while ( " << TRUE() << " ) {\n"; + } + + FROM_STATE_ACTIONS(); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( redFsm->anyEofTrans() || redFsm->anyEofActions() ) { + if ( redFsm->anyEofTrans() ) { + out << + " if ( " << ARR_REF( eofTrans ) << "[" << vCS() << "] > 0 ) {\n" + " " << trans << " = " << + CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n" + " }\n"; + } + } + + out << + "}\n" + "else {\n"; + } + + LOCATE_TRANS(); + + if ( !noEnd && eof ) { + out << + "}\n"; + } + + LOCATE_COND(); + + if ( redFsm->anyRegCurStateRef() ) + out << " " << ps << " = " << vCS() << ";\n"; + + string condVar = + red->condSpaceList.length() != 0 ? string(cond) : string(trans); + + out << + " " << vCS() << " = " << CAST(INT()) << ARR_REF( condTargs ) << "[" << condVar << "];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << ARR_REF( condActions ) << "[" << condVar << "] != 0 ) {\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) + out << " " << nbreak << " = 0;\n"; + + REG_ACTIONS( condVar ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " " << BREAK( _resume ) << ";\n"; + } + + out << "}\n"; + } + + + if ( loopLabels ) { + out << BREAK( _again ) << ";\n}\n"; + } + + out << "\n" << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " " << BREAK( _resume ) << ";\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " " << CONTINUE( _resume ) << ";\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " " << BREAK ( _resume ) << ";\n" + "\n" + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + } + else { + out << + " " << BREAK( _resume ) << ";\n"; + } + + out << + "}\n"; + + out << EMIT_LABEL( _out ); + + out << " }\n"; +} + diff --git a/ragel/tabgoto.cc b/ragel/tabgoto.cc new file mode 100644 index 00000000..7c74ab71 --- /dev/null +++ b/ragel/tabgoto.cc @@ -0,0 +1,330 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" +#include "binary.h" +#include "flat.h" + +void TabGoto::CONTROL_JUMP( ostream &ret, bool inFinish ) +{ + ret << "goto " << _again << ";"; +} + +void TabGoto::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << ";"; + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << " = " << + callDest << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << "; " << CLOSE_GEN_BLOCK(); +} + +void TabGoto::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void TabGoto::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + "goto " << _out << "; " << + CLOSE_GEN_BLOCK(); +} + +void TabGoto::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + nbreak << " = 1;" << + CLOSE_GEN_BLOCK(); +} + +void TabGoto::writeExec() +{ + out << + " {\n"; + + DECLARE( INT(), ps ); + DECLARE( INT(), cpc ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), klen ); + DECLARE( INDEX( ARR_TYPE( condKeys ) ), ckeys ); + DECLARE( INDEX( ARR_TYPE( eofCondKeys ) ), cekeys ); + DECLARE( UINT(), trans, " = 0" ); + DECLARE( UINT(), cond, " = 0" ); + DECLARE( INDEX( ALPH_TYPE() ), keys ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( INDEX( ARR_TYPE( indicies ) ), inds ); + DECLARE( UINT(), nacts ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INT(), ic ); + + out << EMIT_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " if ( " << P() << " == " << PE() << " && " << P() << " != " << vEOF() << " )\n" + " goto " << _out << ";\n"; + } + else { + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto " << _out << ";\n"; + } + } + + NFA_PUSH( vCS() ); + + FROM_STATE_ACTIONS(); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( redFsm->anyEofTrans() || redFsm->anyEofActions() ) { + if ( redFsm->anyEofTrans() ) { + out << + " if ( " << ARR_REF( eofTrans ) << "[" << vCS() << "] > 0 ) {\n" + " " << trans << " = " << + CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n" + " }\n"; + } + } + + out << + "}\n" + "else {\n"; + } + + LOCATE_TRANS(); + + if ( !noEnd && eof ) { + out << + "}\n"; + } + + LOCATE_COND(); + + if ( redFsm->anyRegCurStateRef() ) + out << " " << ps << " = " << vCS() << ";\n"; + + string condVar = + red->condSpaceList.length() != 0 ? string(cond) : string(trans); + + out << + " " << vCS() << " = " << CAST(INT()) << ARR_REF( condTargs ) << "[" << condVar << "];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << ARR_REF( condActions ) << "[" << condVar << "] != 0 ) {\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) + out << " " << nbreak << " = 0;\n"; + + REG_ACTIONS( condVar ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + out << "}\n"; + } + + out << "\n" << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " goto " << _out << ";\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " goto " << _resume << ";\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " goto " << _out << ";\n" + "\n" + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + out << "goto " << _resume << ";\n"; + } + + out << EMIT_LABEL( _out ); + + out << " }\n"; +} + diff --git a/ragel/tables.cc b/ragel/tables.cc new file mode 100644 index 00000000..40edd93e --- /dev/null +++ b/ragel/tables.cc @@ -0,0 +1,81 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" + +void Tables::CURS( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_EXPR() << ps << CLOSE_GEN_EXPR(); +} + +void Tables::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << OPEN_GEN_EXPR() << vCS() << CLOSE_GEN_EXPR(); +} + +void Tables::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << nextDest << ";" << CLOSE_GEN_BLOCK(); +} + +void Tables::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << "" << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";" << CLOSE_GEN_BLOCK(); +} + +void Tables::EOF_TRANS() +{ + out << + "" << trans << " = " << CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n"; + + if ( red->condSpaceList.length() > 0 ) { + out << + "" << cond << " = " << CAST(UINT()) << ARR_REF( transOffsets ) << "[" << trans << "];\n"; + } +} + +void Tables::COND_EXEC( std::string expr ) +{ + out << + " switch ( " << expr << " ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + out << " " << CASE( STR( condSpace->condSpaceId ) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + + out << + " }\n"; +} + diff --git a/ragel/tables.h b/ragel/tables.h new file mode 100644 index 00000000..5799aacb --- /dev/null +++ b/ragel/tables.h @@ -0,0 +1,265 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _TABLES_H +#define _TABLES_H + +#include <iostream> +#include "codegen.h" + +struct Tables +: + public CodeGen +{ + Tables( const CodeGenArgs &args ) + : + CodeGen( args ), + + pa( "_pa" ), + klen( "_klen" ), + ckeys( "_ckeys" ), + cekeys( "_cekeys" ), + trans( "_trans" ), + cond( "_cond" ), + keys( "_keys" ), + acts( "_acts" ), + nacts( "_nacts" ), + inds( "_inds" ), + + cont( "_cont" ), + nfa_repeat( "_nfa_repeat" ), + nfa_test( "_nfa_test" ), + ps( "_ps" ), + nbreak( "_nbreak" ), + have( "__have" ), + ic( "_ic" ), + + _out("_out"), + _pop("_pop"), + _test_eof( "_test_eof" ), + _resume( "_resume" ), + _match_cond( "_match_cond" ), + _again( "_again" ), + _match( "_match" ), + _eof_goto( "_eof_goto" ), + + actions( "actions", *this ), + transKeys( "trans_keys", *this ), + charClass( "char_class", *this ), + flatIndexOffset( "index_offsets", *this ), + indicies( "indicies", *this ), + indexDefaults( "index_defaults", *this ), + transCondSpaces( "trans_cond_spaces", *this ), + transOffsets( "trans_offsets", *this ), + condTargs( "cond_targs", *this ), + condActions( "cond_actions", *this ), + toStateActions( "to_state_actions", *this ), + fromStateActions( "from_state_actions", *this ), + eofCondSpaces( "eof_cond_spaces", *this ), + eofCondKeyOffs( "eof_cond_key_offs", *this ), + eofCondKeyLens( "eof_cond_key_lens", *this ), + eofCondKeys( "eof_cond_keys", *this ), + eofActions( "eof_actions", *this ), + eofTrans( "eof_trans", *this ), + + keyOffsets( "key_offsets", *this ), + singleLens( "single_lengths", *this ), + rangeLens( "range_lengths", *this ), + indexOffsets( "index_offsets", *this ), + transCondSpacesWi( "trans_cond_spaces_wi", *this ), + transOffsetsWi( "trans_offsets_wi", *this ), + transLengthsWi( "trans_lengths_wi", *this ), + transLengths( "trans_lengths", *this ), + condKeys( "cond_keys", *this ) + {} + + Variable pa; + Variable klen; + Variable ckeys; + Variable cekeys; + Variable trans; + Variable cond; + Variable keys; + Variable acts; + Variable nacts; + Variable inds; + Variable cont; + Variable nfa_repeat; + Variable nfa_test; + Variable ps; + Variable nbreak; + Variable have; + Variable ic; + + GotoLabel _out; + GotoLabel _pop; + GotoLabel _test_eof; + GotoLabel _resume; + GotoLabel _match_cond; + GotoLabel _again; + GotoLabel _match; + GotoLabel _eof_goto; + + TableArray actions; + TableArray transKeys; + TableArray charClass; + TableArray flatIndexOffset; + TableArray indicies; + TableArray indexDefaults; + TableArray transCondSpaces; + TableArray transOffsets; + TableArray condTargs; + TableArray condActions; + TableArray toStateActions; + TableArray fromStateActions; + TableArray eofCondSpaces; + TableArray eofCondKeyOffs; + TableArray eofCondKeyLens; + TableArray eofCondKeys; + TableArray eofActions; + TableArray eofTrans; + + TableArray keyOffsets; + TableArray singleLens; + TableArray rangeLens; + TableArray indexOffsets; + TableArray transCondSpacesWi; + TableArray transOffsetsWi; + TableArray transLengthsWi; + TableArray transLengths; + TableArray condKeys; + + int errCondOffset; + + virtual void TO_STATE_ACTION( RedStateAp *state ) = 0; + virtual void FROM_STATE_ACTION( RedStateAp *state ) = 0; + virtual void EOF_ACTION( RedStateAp *state ) = 0; + virtual void COND_ACTION( RedCondPair *cond ) = 0; + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ) = 0; + virtual void NFA_POP_TEST( RedNfaTarg *targ ) = 0; + virtual void NFA_FROM_STATE_ACTION_EXEC() = 0; + + virtual void FROM_STATE_ACTIONS() = 0; + virtual void REG_ACTIONS( std::string cond ) = 0; + virtual void TO_STATE_ACTIONS() = 0; + virtual void EOF_ACTIONS() = 0; + + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void EOF_TRANS(); + void COND_EXEC( std::string expr ); +}; + +struct TabGoto +: + public virtual Tables +{ + TabGoto( const CodeGenArgs &args ) + : + Tables( args ) + {} + + void CONTROL_JUMP( ostream &ret, bool inFinish ); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + void NFA_POP() {} + + void writeExec(); +}; + +struct TabBreak +: + public virtual Tables +{ + TabBreak( const CodeGenArgs &args ) + : + Tables( args ), + loopLabels( args.loopLabels ) + {} + + void CONTROL_JUMP( ostream &ret, bool inFinish ); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + void NFA_POP() {} + + void writeExec(); + + bool loopLabels; + std::string BREAK( GotoLabel &label ); + std::string CONTINUE( GotoLabel &label ); + std::string BREAK_LABEL( GotoLabel &label ); +}; + +struct TabVar +: + public virtual Tables +{ + TabVar( const CodeGenArgs &args ) + : + Tables( args ) + {} + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + void NFA_POP() {} + + std::string BREAK( GotoLabel &label ); + std::string CONTINUE( GotoLabel &label ); + std::string BREAK_LABEL( GotoLabel &label ); + + void writeExec(); +}; + + +#endif diff --git a/ragel/tabvar.cc b/ragel/tabvar.cc new file mode 100644 index 00000000..12f7fdf5 --- /dev/null +++ b/ragel/tabvar.cc @@ -0,0 +1,332 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" +#include "flatvar.h" +#include "binvar.h" + +std::string TabVar::BREAK( GotoLabel &label ) +{ + return "{ _cont = 0; _again = 0; }"; +} + +std::string TabVar::CONTINUE( GotoLabel &label ) +{ + return "{ _cont = 0; _again = 1; }"; +} + +std::string TabVar::BREAK_LABEL( GotoLabel &label ) +{ + return ""; +} + +void TabVar::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR( "-", 1 ); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + red->id->error() << "cannot use fcall in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << " = " << + callDest << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + red->id->error() << "cannot use fcall in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR( "-", 1 ); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::RET( ostream &ret, bool inFinish ) +{ + red->id->error() << "cannot use fret in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << "-= 1;" << vCS() << " = " << + STACK() << "[" << TOP() << "]; "; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void TabVar::BREAK( ostream &ret, int targState, bool csForced ) +{ + red->id->error() << "cannot use fbreak in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << "+= 1;\n" << + nbreak << " = 1;" << + CLOSE_GEN_BLOCK(); +} + +void TabVar::writeExec() +{ + out << + "{\n"; + + DECLARE( INT(), ps ); + DECLARE( INT(), cpc ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), klen ); + DECLARE( INDEX( ARR_TYPE( condKeys ) ), ckeys ); + DECLARE( INDEX( ARR_TYPE( eofCondKeys ) ), cekeys ); + DECLARE( UINT(), trans, " = 0" ); + DECLARE( UINT(), cond, " = 0" ); + DECLARE( INDEX( ALPH_TYPE() ), keys ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( INDEX( ARR_TYPE( indicies ) ), inds ); + DECLARE( UINT(), nacts ); + DECLARE( INT(), have ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INT(), ic ); + + out << UINT() << " _have = 0;\n"; + out << UINT() << " _cont = 1;\n"; + out << UINT() << " _again = 1;\n"; + out << UINT() << " _bsc = 1;\n"; + + out << BREAK_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " while ( _again == 1 && ( " << P() << " != " << PE() << " || " << P() << " == " << vEOF() << " ) ) {\n"; + } + else { + out << + " while ( _again == 1 && " << P() << " != " << PE() << " ) {\n"; + } + } + else { + out << + " while ( _again == 1 ) {\n"; + + } + + out << "_cont = 1;\n"; + out << "_again = 1;\n"; + + NFA_PUSH( vCS() ); + + FROM_STATE_ACTIONS(); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( redFsm->anyEofTrans() || redFsm->anyEofActions() ) { + if ( redFsm->anyEofTrans() ) { + out << + " if ( " << ARR_REF( eofTrans ) << "[" << vCS() << "] > 0 ) {\n" + " " << trans << " = " << + CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n" + " }\n"; + } + } + + out << + "}\n" + "else {\n"; + } + + LOCATE_TRANS(); + + if ( !noEnd && eof ) { + out << + "}\n"; + } + + LOCATE_COND(); + + if ( redFsm->anyRegCurStateRef() ) + out << " " << ps << " = " << vCS() << ";\n"; + + string condVar = + red->condSpaceList.length() != 0 ? string(cond) : string(trans); + + out << + " " << vCS() << " = " << CAST(INT()) << ARR_REF( condTargs ) << "[" << condVar << "];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << ARR_REF( condActions ) << "[" << condVar << "] != 0 ) {\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) + out << " " << nbreak << " = 0;\n"; + + REG_ACTIONS( condVar ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " " << BREAK( _resume ) << "\n"; + } + + out << "}\n"; + } + + out << "if ( _cont == 1 ) {\n"; + + out << "\n" << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " " << BREAK( _resume ) << "\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " " << CONTINUE( _resume ) << "\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + out << "if ( _cont == 1 ) {\n"; + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " " << BREAK ( _resume ) << "\n" + "\n"; + + out << "if ( _cont == 1 ) {\n"; + + out << + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + /* cont */ + out << "}\n"; + } + else { + out << + " " << BREAK( _resume ) << "\n"; + } + + /* cont */ + out << "}}\n"; + + /* P loop. */ + out << "}\n"; + + out << EMIT_LABEL( _out ); + + /* Variable decl. */ + out << "}\n"; +} + diff --git a/ragel/version.h.cmake.in b/ragel/version.h.cmake.in new file mode 100644 index 00000000..3e4c310f --- /dev/null +++ b/ragel/version.h.cmake.in @@ -0,0 +1,9 @@ +/* version.h Generated from version.h.cmake.in by cmake */ + +#ifndef _COLM_VERSION_H +#define _COLM_VERSION_H + +#cmakedefine VERSION "@VERSION@" +#cmakedefine PUBDATE "@PUBDATE@" + +#endif /* _COLM_VERSION_H */ diff --git a/ragel/xml.cc b/ragel/xml.cc new file mode 100644 index 00000000..861bb89f --- /dev/null +++ b/ragel/xml.cc @@ -0,0 +1,786 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * XML Output not included in 7.0 (yet -- possibly) + */ + +#include "ragel.h" +#include "xml.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include "gendata.h" +#include "inputdata.h" +#include <string.h> +#include "version.h" + +using std::endl; + +void InputData::processXML() +{ + /* Compiles machines. */ + prepareAllMachines(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + createOutputStream(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* + * From this point on we should not be reporting any errors. + */ + + openOutput(); + writeXML( *outStream ); + closeOutput(); +} + +XMLCodeGen::XMLCodeGen( std::string fsmName, int machineId, FsmGbl *id, PdBase *pd, FsmAp *fsm, std::ostream &out ) +: + RedBase( id, pd, fsm, fsmName, machineId ), + out(out) +{ +} + +void XMLCodeGen::writeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + /* Write the list. */ + out << " <action_list length=\"" << nextActionId << "\">\n"; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + writeAction( act ); + } + out << " </action_list>\n"; +} + +void XMLCodeGen::writeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + out << " <action_table_list length=\"" << numTables << "\">\n"; + for ( int t = 0; t < numTables; t++ ) { + out << " <action_table id=\"" << t << "\" length=\"" << + tables[t]->key.length() << "\">"; + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + out << atel->value->actionId; + if ( ! atel.last() ) + out << " "; + } + out << "</action_table>\n"; + } + out << " </action_table_list>\n"; + + delete[] tables; +} + +void XMLCodeGen::writeKey( Key key ) +{ + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); +} + +void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans ) +{ + /* Write the transition. */ + out << " <t>"; + writeKey( lowKey ); + out << " "; + writeKey( highKey ); + + if ( trans->plain() ) { + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->tdap()->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->tdap()->actionTable ); + + if ( trans->tdap()->toState != 0 ) + out << " " << trans->tdap()->toState->alg.stateNum; + else + out << " x"; + + if ( actionTable != 0 ) + out << " " << actionTable->id; + else + out << " x"; + } + else { + for ( CondList::Iter ctel = trans->tcap()->condList; ctel.lte(); ctel++ ) { + out << "<c>"; + out << trans->tcap()->condSpace->condSpaceId; + + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( ctel->actionTable.length() > 0 ) + actionTable = actionTableMap.find( ctel->actionTable ); + + if ( ctel->toState != 0 ) + out << " " << ctel->toState->alg.stateNum; + else + out << " x"; + + if ( actionTable != 0 ) + out << " " << actionTable->id; + else + out << " x"; + + out << "</c>"; + } + } + + out << "</t>\n"; +} + +void XMLCodeGen::writeTransList( StateAp *state ) +{ + TransListVect outList; + + out << " <trans_list length=\"" << state->outList.length() << "\">\n"; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + writeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + out << " </trans_list>\n"; +} + +void XMLCodeGen::writeEofTrans( StateAp *state ) +{ + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + /* The <eof_t> is used when there is an eof target, otherwise the eof + * action goes into state actions. */ + if ( state->eofTarget != 0 ) { + out << " <eof_t>" << state->eofTarget->alg.stateNum; + + if ( eofActions != 0 ) + out << " " << eofActions->id; + else + out << " x"; + + out << "</eof_t>" << endl; + } +} + +void XMLCodeGen::writeText( InlineItem *item ) +{ + if ( item->prev == 0 || item->prev->type != InlineItem::Text ) + out << "<text>"; + xmlEscapeHost( out, item->data.c_str(), item->data.size() ); + if ( item->next == 0 || item->next->type != InlineItem::Text ) + out << "</text>"; +} + +void XMLCodeGen::writeGoto( InlineItem *item ) +{ + if ( pd->generatingSectionSubset ) + out << "<goto>-1</goto>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<goto>" << targ->value->alg.stateNum << "</goto>"; + } +} + +void XMLCodeGen::writeCall( InlineItem *item ) +{ + if ( pd->generatingSectionSubset ) + out << "<call>-1</call>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<call>" << targ->value->alg.stateNum << "</call>"; + } +} + +void XMLCodeGen::writeNext( InlineItem *item ) +{ + if ( pd->generatingSectionSubset ) + out << "<next>-1</next>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<next>" << targ->value->alg.stateNum << "</next>"; + } +} + +void XMLCodeGen::writeGotoExpr( InlineItem *item ) +{ + out << "<goto_expr>"; + writeInlineList( item->children ); + out << "</goto_expr>"; +} + +void XMLCodeGen::writeCallExpr( InlineItem *item ) +{ + out << "<call_expr>"; + writeInlineList( item->children ); + out << "</call_expr>"; +} + +void XMLCodeGen::writeNextExpr( InlineItem *item ) +{ + out << "<next_expr>"; + writeInlineList( item->children ); + out << "</next_expr>"; +} + +void XMLCodeGen::writeEntry( InlineItem *item ) +{ + if ( pd->generatingSectionSubset ) + out << "<entry>-1</entry>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<entry>" << targ->value->alg.stateNum << "</entry>"; + } +} + +void XMLCodeGen::writeActionExec( InlineItem *item ) +{ + out << "<exec>"; + writeInlineList( item->children ); + out << "</exec>"; +} + +void XMLCodeGen::writeLmOnLast( InlineItem *item ) +{ + out << "<set_tokend>1</set_tokend>"; + + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList ); + out << "</sub_action>"; + } +} + +void XMLCodeGen::writeLmOnNext( InlineItem *item ) +{ + out << "<set_tokend>0</set_tokend>"; + out << "<hold></hold>"; + + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList ); + out << "</sub_action>"; + } +} + +void XMLCodeGen::writeLmOnLagBehind( InlineItem *item ) +{ + out << "<exec><get_tokend></get_tokend></exec>"; + + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList ); + out << "</sub_action>"; + } +} + +void XMLCodeGen::writeLmSwitch( InlineItem *item ) +{ + LongestMatch *longestMatch = item->longestMatch; + out << "<lm_switch>\n"; + + /* We can't put the <exec> here because we may need to handle the error + * case and in that case p should not be changed. Instead use a default + * label in the switch to adjust p when user actions are not set. An id of + * -1 indicates the default. */ + + if ( longestMatch->lmSwitchHandlesError ) { + /* If the switch handles error then we should have also forced the + * error state. */ + assert( fsm->errState != 0 ); + + out << " <sub_action id=\"0\">"; + out << "<goto>" << fsm->errState->alg.stateNum << "</goto>"; + out << "</sub_action>\n"; + } + + bool needDefault = false; + for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect ) { + if ( lmi->action == 0 ) + needDefault = true; + else { + /* Open the action. Write it with the context that sets up _p + * when doing control flow changes from inside the machine. */ + out << " <sub_action id=\"" << lmi->longestMatchId << "\">"; + out << "<exec><get_tokend></get_tokend></exec>"; + writeInlineList( lmi->action->inlineList ); + out << "</sub_action>\n"; + } + } + } + + if ( needDefault ) { + out << " <sub_action id=\"-1\"><exec><get_tokend>" + "</get_tokend></exec></sub_action>\n"; + } + + out << " </lm_switch>"; +} + +void XMLCodeGen::writeInlineList( InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + writeText( item ); + break; + case InlineItem::Goto: + writeGoto( item ); + break; + case InlineItem::GotoExpr: + writeGotoExpr( item ); + break; + case InlineItem::Call: + writeCall( item ); + break; + case InlineItem::CallExpr: + writeCallExpr( item ); + break; + case InlineItem::Next: + writeNext( item ); + break; + case InlineItem::NextExpr: + writeNextExpr( item ); + break; + case InlineItem::Break: + out << "<break></break>"; + break; + case InlineItem::Ret: + out << "<ret></ret>"; + break; + case InlineItem::PChar: + out << "<pchar></pchar>"; + break; + case InlineItem::Char: + out << "<char></char>"; + break; + case InlineItem::Curs: + out << "<curs></curs>"; + break; + case InlineItem::Targs: + out << "<targs></targs>"; + break; + case InlineItem::Entry: + writeEntry( item ); + break; + + case InlineItem::Hold: + out << "<hold></hold>"; + break; + case InlineItem::Exec: + writeActionExec( item ); + break; + + case InlineItem::LmSetActId: + out << "<set_act>" << + item->longestMatchPart->longestMatchId << + "</set_act>"; + break; + case InlineItem::LmSetTokEnd: + out << "<set_tokend>1</set_tokend>"; + break; + + case InlineItem::LmOnLast: + writeLmOnLast( item ); + break; + case InlineItem::LmOnNext: + writeLmOnNext( item ); + break; + case InlineItem::LmOnLagBehind: + writeLmOnLagBehind( item ); + break; + case InlineItem::LmSwitch: + writeLmSwitch( item ); + break; + + case InlineItem::LmInitAct: + out << "<init_act></init_act>"; + break; + case InlineItem::LmInitTokStart: + out << "<init_tokstart></init_tokstart>"; + break; + case InlineItem::LmSetTokStart: + out << "<set_tokstart></set_tokstart>"; + break; + + /* Stubbed. */ + case InlineItem::Ncall: + case InlineItem::NcallExpr: + case InlineItem::Nret: + case InlineItem::Nbreak: + case InlineItem::Stmt: + case InlineItem::Subst: + case InlineItem::NfaWrapAction: + case InlineItem::NfaWrapConds: + break; + } + } +} + + +void XMLCodeGen::writeAction( Action *action ) +{ + out << " <action id=\"" << action->actionId << "\""; + if ( !action->name.empty() ) + out << " name=\"" << action->name << "\""; + out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">"; + writeInlineList( action->inlineList ); + out << "</action>\n"; +} + +void xmlEscapeHost( std::ostream &out, const char *data, long len ) +{ + const char *end = data + len; + while ( data != end ) { + switch ( *data ) { + case '<': out << "<"; break; + case '>': out << ">"; break; + case '&': out << "&"; break; + default: out << *data; break; + } + data += 1; + } +} + +void XMLCodeGen::writeStateActions( StateAp *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + /* EOF actions go out here only if the state has no eof target. If it has + * an eof target then an eof transition will be used instead. */ + RedActionTable *eofActions = 0; + if ( state->eofTarget == 0 && state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { + out << " <state_actions>"; + if ( toStateActions != 0 ) + out << toStateActions->id; + else + out << "x"; + + if ( fromStateActions != 0 ) + out << " " << fromStateActions->id; + else + out << " x"; + + if ( eofActions != 0 ) + out << " " << eofActions->id; + else + out << " x"; + + out << "</state_actions>\n"; + } +} + +void XMLCodeGen::writeStateList() +{ + /* Write the list of states. */ + out << " <state_list length=\"" << fsm->stateList.length() << "\">\n"; + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + out << " <state id=\"" << st->alg.stateNum << "\""; + if ( st->isFinState() ) + out << " final=\"t\""; + out << ">\n"; + + writeStateActions( st ); + writeEofTrans( st ); + writeTransList( st ); + + out << " </state>\n"; + + if ( !st.last() ) + out << "\n"; + } + out << " </state_list>\n"; +} + +bool XMLCodeGen::writeNameInst( NameInst *nameInst ) +{ + bool written = false; + if ( nameInst->parent != 0 ) + written = writeNameInst( nameInst->parent ); + + if ( !nameInst->name.empty() ) { + if ( written ) + out << '_'; + out << nameInst->name; + written = true; + } + + return written; +} + +void XMLCodeGen::writeEntryPoints() +{ + /* List of entry points other than start state. */ + if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) { + out << " <entry_points"; + if ( pd->lmRequiresErrorState ) + out << " error=\"t\""; + out << ">\n"; + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + NameInst *nameInst = pd->nameIndex[en->key]; + StateAp *state = en->value; + out << " <entry name=\""; + writeNameInst( nameInst ); + out << "\">" << state->alg.stateNum << "</entry>\n"; + } + out << " </entry_points>\n"; + } +} + +void XMLCodeGen::writeMachine() +{ + /* Open the machine. */ + out << " <machine>\n"; + + /* Action tables. */ + reduceActionTables(); + + writeActionList(); + writeActionTableList(); + writeConditions(); + + /* Start state. */ + out << " <start_state>" << fsm->startState->alg.stateNum << + "</start_state>\n"; + + /* Error state. */ + if ( fsm->errState != 0 ) { + out << " <error_state>" << fsm->errState->alg.stateNum << + "</error_state>\n"; + } + + writeEntryPoints(); + writeStateList(); + + out << " </machine>\n"; +} + + +void XMLCodeGen::writeConditions() +{ + CondData *condData = fsm->ctx->condData; + if ( condData->condSpaceMap.length() > 0 ) { + long nextCondSpaceId = 0; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) + cs->condSpaceId = nextCondSpaceId++; + + out << " <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n"; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + out << " <cond_space id=\"" << cs->condSpaceId << + "\" length=\"" << cs->condSet.length() << "\">"; + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + out << " " << (*csi)->actionId; + out << "</cond_space>\n"; + } + out << " </cond_space_list>\n"; + } +} + +void XMLCodeGen::writeExports() +{ + if ( pd->exportList.length() > 0 ) { + out << " <exports>\n"; + for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) { + out << " <ex name=\"" << exp->name << "\">"; + writeKey( exp->key ); + out << "</ex>\n"; + } + out << " </exports>\n"; + } +} + +void XMLCodeGen::writeXML() +{ + /* Open the definition. */ + out << "<ragel_def name=\"" << fsmName << "\">\n"; + + /* Alphabet type. */ + out << " <alphtype>" << keyOps->alphType->internalName << "</alphtype>\n"; + + /* Getkey expression. */ + if ( pd->getKeyExpr != 0 ) { + out << " <getkey>"; + writeInlineList( pd->getKeyExpr ); + out << "</getkey>\n"; + } + + /* Access expression. */ + if ( pd->accessExpr != 0 ) { + out << " <access>"; + writeInlineList( pd->accessExpr ); + out << "</access>\n"; + } + + /* PrePush expression. */ + if ( pd->prePushExpr != 0 ) { + out << " <prepush>"; + writeInlineList( pd->prePushExpr->inlineList ); + out << "</prepush>\n"; + } + + /* PostPop expression. */ + if ( pd->postPopExpr != 0 ) { + out << " <postpop>"; + writeInlineList( pd->postPopExpr->inlineList ); + out << "</postpop>\n"; + } + + /* + * Variable expressions. + */ + + if ( pd->pExpr != 0 ) { + out << " <p_expr>"; + writeInlineList( pd->pExpr ); + out << "</p_expr>\n"; + } + + if ( pd->peExpr != 0 ) { + out << " <pe_expr>"; + writeInlineList( pd->peExpr ); + out << "</pe_expr>\n"; + } + + if ( pd->eofExpr != 0 ) { + out << " <eof_expr>"; + writeInlineList( pd->eofExpr ); + out << "</eof_expr>\n"; + } + + if ( pd->csExpr != 0 ) { + out << " <cs_expr>"; + writeInlineList( pd->csExpr ); + out << "</cs_expr>\n"; + } + + if ( pd->topExpr != 0 ) { + out << " <top_expr>"; + writeInlineList( pd->topExpr ); + out << "</top_expr>\n"; + } + + if ( pd->stackExpr != 0 ) { + out << " <stack_expr>"; + writeInlineList( pd->stackExpr ); + out << "</stack_expr>\n"; + } + + if ( pd->actExpr != 0 ) { + out << " <act_expr>"; + writeInlineList( pd->actExpr ); + out << "</act_expr>\n"; + } + + if ( pd->tokstartExpr != 0 ) { + out << " <tokstart_expr>"; + writeInlineList( pd->tokstartExpr ); + out << "</tokstart_expr>\n"; + } + + if ( pd->tokendExpr != 0 ) { + out << " <tokend_expr>"; + writeInlineList( pd->tokendExpr ); + out << "</tokend_expr>\n"; + } + + if ( pd->dataExpr != 0 ) { + out << " <data_expr>"; + writeInlineList( pd->dataExpr ); + out << "</data_expr>\n"; + } + + writeExports(); + + writeMachine(); + + out << + "</ragel_def>\n"; +} + +void InputData::writeLanguage( std::ostream &out ) +{ + out << " lang=\""; + switch ( hostLang->lang ) { + case HostLang::C: out << "C"; break; + case HostLang::D: out << "D"; break; + case HostLang::Go: out << "Go"; break; + case HostLang::Java: out << "Java"; break; + case HostLang::Ruby: out << "Ruby"; break; + case HostLang::CSharp: out << "C#"; break; + case HostLang::OCaml: out << "OCaml"; break; + case HostLang::Crack: out << "Crack"; break; + case HostLang::Asm: out << "ASM"; break; + case HostLang::Rust: out << "Rust"; break; + case HostLang::Julia: out << "Julia"; break; + case HostLang::JS: out << "JavaScript"; break; + } + out << "\""; +} + +void InputData::writeXML( std::ostream &out ) +{ + out << "<ragel version=\"" VERSION "\" filename=\"" << inputFileName << "\""; + writeLanguage( out ); + out << ">\n"; + + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) + pd->generateXML( *outStream ); + } + + out << "</ragel>\n"; +} diff --git a/ragel/xml.h b/ragel/xml.h new file mode 100644 index 00000000..60135055 --- /dev/null +++ b/ragel/xml.h @@ -0,0 +1,81 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _XMLCODEGEN_H +#define _XMLCODEGEN_H + +#if 0 + +#include <iostream> +#include "avltree.h" +#include "fsmgraph.h" +#include "parsedata.h" +#include "redfsm.h" +#include "gendata.h" + +class XMLCodeGen : protected RedBase +{ +public: + XMLCodeGen( std::string fsmName, int machineId, FsmGbl *id, PdBase *pd, FsmAp *fsm, std::ostream &out ); + + void writeXML( ); + +private: + void writeStateActions( StateAp *state ); + void writeStateList(); + + void writeKey( Key key ); + void writeText( InlineItem *item ); + void writeGoto( InlineItem *item ); + void writeGotoExpr( InlineItem *item ); + void writeCall( InlineItem *item ); + void writeCallExpr( InlineItem *item ); + void writeNext( InlineItem *item ); + void writeNextExpr( InlineItem *item ); + void writeEntry( InlineItem *item ); + void writeLmOnLast( InlineItem *item ); + void writeLmOnNext( InlineItem *item ); + void writeLmOnLagBehind( InlineItem *item ); + + void writeExports(); + bool writeNameInst( NameInst *nameInst ); + void writeEntryPoints(); + void writeConditions(); + void writeInlineList( InlineList *inlineList ); + void writeActionList(); + void writeActionTableList(); + void reduceTrans( TransAp *trans ); + void writeTransList( StateAp *state ); + void writeEofTrans( StateAp *state ); + void writeTrans( Key lowKey, Key highKey, TransAp *defTrans ); + void writeAction( Action *action ); + void writeLmSwitch( InlineItem *item ); + void writeMachine(); + void writeActionExec( InlineItem *item ); + + std::ostream &out; +}; + +#endif + +#endif + diff --git a/ragel/xmlparse.kh b/ragel/xmlparse.kh new file mode 100644 index 00000000..1b0b30ad --- /dev/null +++ b/ragel/xmlparse.kh @@ -0,0 +1,211 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _XMLPARSE_H +#define _XMLPARSE_H + +#include "vector.h" +#include "gendata.h" +#include "buffer.h" +#include <iostream> + +using std::istream; +using std::ostream; + +#define XML_BUFSIZE 4096 + +struct AttrMarker +{ + char *id; + int idLen; + char *value; + int valueLen; +}; + +struct Attribute +{ + char *id; + char *value; +}; + +typedef Vector<AttrMarker> AttrMkList; +typedef Vector<Attribute> AttrList; +struct XMLTagHashPair; + +struct XMLTag +{ + enum TagType { Open, Close }; + + XMLTag( XMLTagHashPair *tagId, TagType type ) : + tagId(tagId), type(type), + content(0), attrList(0) {} + + Attribute *findAttr( const char *id ) + { + if ( attrList != 0 ) { + for ( AttrList::Iter attr = *attrList; attr.lte(); attr++ ) { + if ( strcmp( id, attr->id ) == 0 ) + return attr; + } + } + return 0; + } + + XMLTagHashPair *tagId; + TagType type; + + /* Content is associtated with closing tags. */ + char *content; + + /* Attribute lists are associated with opening tags. */ + AttrList *attrList; +}; + + +struct XMLTagHashPair +{ + const char *name; + int id; +}; + +struct Token; + +struct GenInlineItem; +struct GenInlineList; + +struct LmSwitchVect; +struct LmSwitchAction; + +struct XmlScanner +{ + XmlScanner( const char *fileName, istream &input ); + + int scan(); + void adjustAttrPointers( int distance ); + std::ostream &error(); + + const char *fileName; + istream &input; + + /* Scanner State. */ + int cs, act, have, curline, curcol; + char *ts, *te; + char *p, *pe; + int done; + + /* Token data */ + char *data; + int data_len; + int value; + AttrMkList attrMkList; + Buffer buffer; + char *tag_id_start; + int tag_id_len; + int token_col, token_line; + + char buf[XML_BUFSIZE]; +}; + + +struct XmlParser +{ + %%{ + parser XmlParser; + + token TAG_unknown, TAG_ragel, TAG_ragel_def, TAG_host, TAG_state_list, + TAG_state, TAG_trans_list, TAG_t, TAG_machine, TAG_start_state, + TAG_error_state, TAG_action_list, TAG_action_table_list, + TAG_action, TAG_action_table, TAG_alphtype, TAG_element, + TAG_getkey, TAG_state_actions, TAG_entry_points, TAG_sub_action, + TAG_cond_space_list, TAG_cond_space, TAG_cond_list, TAG_c, + TAG_exports, TAG_ex; + + # Inline block tokens. + token TAG_text, TAG_goto, TAG_call, TAG_next, TAG_goto_expr, + TAG_call_expr, TAG_next_expr, TAG_ret, TAG_pchar, TAG_char, + TAG_hold, TAG_exec, TAG_curs, TAG_targs, TAG_entry, TAG_data, + TAG_lm_switch, TAG_init_act, TAG_set_act, TAG_set_tokend, + TAG_get_tokend, TAG_init_tokstart, TAG_set_tokstart; + + token TAG_write, TAG_access, TAG_break, TAG_arg, TAG_cs_expr; + + token TAG_p_expr, TAG_pe_expr, TAG_eof_expr, TAG_cs_expr, TAG_top_expr, + TAG_stack_expr, TAG_act_expr, TAG_tokstart_expr, TAG_tokend_expr, + TAG_data_expr, TAG_prepush, TAG_postpop, TAG_eof_t; + }%% + + %% write instance_data; + + void init(); + int parseLangEl( int type, const Token *token ); + + XmlParser( const char *sourceFileName, const char *xmlFileName, bool outputActive, bool wantComplete ) : + sourceFileName(sourceFileName), + fileName(xmlFileName), + outStream(0), + outputActive(outputActive), + wantComplete(wantComplete), + cgd(0) { } + + int token( int tokenId, Token &token ); + int token( int tokenId, int col, int line ); + int token( XMLTag *tag, int col, int line ); + + void openOutput(); + + /* Report an error encountered by the parser. */ + ostream &warning( const InputLoc &loc ); + ostream &error(); + ostream &error( const InputLoc &loc ); + ostream &parser_error( int tokId, Token &token ); + ostream &source_error( const InputLoc &loc ); + + /* The name of the root section, this does not change during an include. */ + const char *sourceFileName; + const char *fileName; + ostream *outStream; + bool outputActive; + bool wantComplete; + + /* Collected during parsing. */ + char *attrKey; + char *attrValue; + int curAction; + int curActionTable; + int curTrans; + int curState; + int curCondSpace; + int curStateCond; + + CodeGenData *cgd; + CodeGenMap codeGenMap; + + Vector <char*> writeOptions; +}; + +%% write token_defs; + +int xml_parse( std::istream &input, const char *fileName, + bool outputActive, bool wantComplete, + XmlScanner &scanner, XmlParser &parser ); + +#endif diff --git a/ragel/xmlparse.kl b/ragel/xmlparse.kl new file mode 100644 index 00000000..04d95b83 --- /dev/null +++ b/ragel/xmlparse.kl @@ -0,0 +1,1006 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "xmlparse.h" +#include "common.h" +#include "gendata.h" +#include "version.h" +#include <iostream> +#include <stdlib.h> + +using std::ostream; +using std::istream; +using std::cerr; +using std::endl; + +Key readKey( char *td, char **end ); +long readOffsetPtr( char *td, char **end ); +unsigned long readLength( char *td ); + +struct Token +{ + XMLTag *tag; + InputLoc loc; +}; + +%%{ + +parser XmlParser; + +include "xmlparse.kh"; + +start: tag_ragel; +start: + final { + /* If we get no input the assumption is that the frontend died and + * emitted an error. This forces the backend to return a non-zero + * exit status, but does not print an error. */ + gblErrorCount += 1; + }; + +tag_ragel: tag_ragel_head ragel_def_list host_or_write_list '/' TAG_ragel; + +tag_ragel_head: TAG_ragel + final { + /* Check version used to generated the intermediate file. */ + Attribute *versionAttr = $1->tag->findAttr( "version" ); + if ( versionAttr == 0 ) + error($1->loc) << "tag <ragel> requires a version attribute" << endp; + if ( strcmp( versionAttr->value, VERSION ) != 0 ) + error($1->loc) << "version mismatch between frontend and backend" << endp; + + /* Check for file name attribute. */ + Attribute *fileNameAttr = $1->tag->findAttr( "filename" ); + if ( fileNameAttr == 0 ) + error($1->loc) << "tag <ragel> requires a filename attribute" << endp; + sourceFileName = fileNameAttr->value; + + /* Check for language attribute. */ + Attribute *langAttr = $1->tag->findAttr( "lang" ); + if ( langAttr == 0 ) + error($1->loc) << "tag <ragel> requires a lang attribute" << endp; + + if ( generateDot ) + outStream = dotOpenOutput( sourceFileName ); + else if ( strcmp( langAttr->value, "C" ) == 0 ) { + hostLang = &hostLangC; + outStream = cdOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "D" ) == 0 ) { + hostLang = &hostLangD; + outStream = cdOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "Java" ) == 0 ) { + hostLang = &hostLangJava; + outStream = javaOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "Ruby" ) == 0 ) { + hostLang = &hostLangRuby; + outStream = rubyOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "C#" ) == 0 ) { + hostLang = &hostLangCSharp; + outStream = csharpOpenOutput( sourceFileName ); + } + else { + error($1->loc) << "expecting lang attribute to be " + "one of C, D, Java, Ruby or C#" << endp; + } + }; + +ragel_def_list: ragel_def_list ragel_def; +ragel_def_list: ; + +host_or_write_list: host_or_write_list host_or_write; +host_or_write_list: ; + +host_or_write: tag_host; +host_or_write: tag_write; + +tag_host: + TAG_host '/' TAG_host + final { + Attribute *lineAttr = $1->tag->findAttr( "line" ); + if ( lineAttr == 0 ) + error($1->loc) << "tag <host> requires a line attribute" << endp; + else { + int line = atoi( lineAttr->value ); + if ( outputActive ) + lineDirective( *outStream, sourceFileName, line ); + } + + if ( outputActive ) + *outStream << $3->tag->content; + }; + +ragel_def: + tag_ragel_def_head ragel_def_item_list '/' TAG_ragel_def + final { + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + cgd->redFsm->maxKey = cgd->findMaxKey(); + + cgd->redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + cgd->redFsm->findFirstFinState(); + + /* Call the user's callback. */ + cgd->finishRagelDef(); + }; + +tag_ragel_def_head: TAG_ragel_def + final { + char *fsmName = 0; + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr != 0 ) { + fsmName = nameAttr->value; + + CodeGenMapEl *mapEl = codeGenMap.find( fsmName ); + if ( mapEl != 0 ) + cgd = mapEl->value; + else { + cgd = makeCodeGen( sourceFileName, fsmName, *outStream, wantComplete ); + codeGenMap.insert( fsmName, cgd ); + } + } + else { + cgd = makeCodeGen( sourceFileName, fsmName, + *outStream, wantComplete ); + } + + ::keyOps = &cgd->thisKeyOps; + }; + +ragel_def_item_list: ragel_def_item_list ragel_def_item; +ragel_def_item_list: ; + +ragel_def_item: tag_alph_type; +ragel_def_item: tag_getkey_expr; +ragel_def_item: tag_access_expr; +ragel_def_item: tag_prepush_expr; +ragel_def_item: tag_postpop_expr; +ragel_def_item: tag_export_list; +ragel_def_item: tag_machine; +ragel_def_item: tag_p_expr; +ragel_def_item: tag_pe_expr; +ragel_def_item: tag_eof_expr; +ragel_def_item: tag_cs_expr; +ragel_def_item: tag_top_expr; +ragel_def_item: tag_stack_expr; +ragel_def_item: tag_act_expr; +ragel_def_item: tag_tokstart_expr; +ragel_def_item: tag_tokend_expr; +ragel_def_item: tag_data_expr; + +tag_export_list: TAG_exports export_list '/' TAG_exports; + +export_list: export_list tag_export; +export_list: ; + +tag_export: TAG_ex '/' TAG_ex + final { + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr == 0 ) + error($1->loc) << "tag <ex> requires a name attribute" << endp; + else { + char *td = $3->tag->content; + Key exportKey = readKey( td, &td ); + cgd->exportList.append( new Export( nameAttr->value, exportKey ) ); + } + }; + +tag_alph_type: TAG_alphtype '/' TAG_alphtype + final { + if ( ! cgd->setAlphType( $3->tag->content ) ) + error($1->loc) << "tag <alphtype> specifies unknown alphabet type" << endp; + }; + +tag_getkey_expr: TAG_getkey inline_list '/' TAG_getkey + final { + cgd->getKeyExpr = $2->inlineList; + }; + +tag_access_expr: TAG_access inline_list '/' TAG_access + final { + cgd->accessExpr = $2->inlineList; + }; + +tag_prepush_expr: TAG_prepush inline_list '/' TAG_prepush + final { + cgd->prePushExpr = $2->inlineList; + }; + +tag_postpop_expr: TAG_postpop inline_list '/' TAG_postpop + final { + cgd->postPopExpr = $2->inlineList; + }; + +tag_p_expr: TAG_p_expr inline_list '/' TAG_p_expr + final { cgd->pExpr = $2->inlineList; }; +tag_pe_expr: TAG_pe_expr inline_list '/' TAG_pe_expr + final { cgd->peExpr = $2->inlineList; }; +tag_eof_expr: TAG_eof_expr inline_list '/' TAG_eof_expr + final { cgd->eofExpr = $2->inlineList; }; +tag_cs_expr: TAG_cs_expr inline_list '/' TAG_cs_expr + final { cgd->csExpr = $2->inlineList; }; +tag_top_expr: TAG_top_expr inline_list '/' TAG_top_expr + final { cgd->topExpr = $2->inlineList; }; +tag_stack_expr: TAG_stack_expr inline_list '/' TAG_stack_expr + final { cgd->stackExpr = $2->inlineList; }; +tag_act_expr: TAG_act_expr inline_list '/' TAG_act_expr + final { cgd->actExpr = $2->inlineList; }; +tag_tokstart_expr: TAG_tokstart_expr inline_list '/' TAG_tokstart_expr + final { cgd->tokstartExpr = $2->inlineList; }; +tag_tokend_expr: TAG_tokend_expr inline_list '/' TAG_tokend_expr + final { cgd->tokendExpr = $2->inlineList; }; +tag_data_expr: TAG_data_expr inline_list '/' TAG_data_expr + final { cgd->dataExpr = $2->inlineList; }; + + +tag_write: tag_write_head write_option_list '/' TAG_write + final { + /* Terminate the options list and call the write statement handler. */ + writeOptions.append(0); + cgd->writeStatement( $1->loc, writeOptions.length()-1, writeOptions.data ); + + /* Clear the options in prep for the next write statement. */ + writeOptions.empty(); + }; + +nonterm tag_write_head +{ + InputLoc loc; +}; + +tag_write_head: TAG_write + final { + Attribute *nameAttr = $1->tag->findAttr( "def_name" ); + Attribute *lineAttr = $1->tag->findAttr( "line" ); + Attribute *colAttr = $1->tag->findAttr( "col" ); + + if ( nameAttr == 0 ) + error($1->loc) << "tag <write> requires a def_name attribute" << endp; + if ( lineAttr == 0 ) + error($1->loc) << "tag <write> requires a line attribute" << endp; + if ( colAttr == 0 ) + error($1->loc) << "tag <write> requires a col attribute" << endp; + + if ( nameAttr != 0 && lineAttr != 0 && colAttr != 0 ) { + $$->loc.line = atoi(lineAttr->value); + $$->loc.col = atoi(colAttr->value); + + CodeGenMapEl *mapEl = codeGenMap.find( nameAttr->value ); + if ( mapEl == 0 ) { + source_error($$->loc) << "write statement given " + "but there are no machine instantiations" << endp; + } + else { + cgd = mapEl->value; + ::keyOps = &cgd->thisKeyOps; + } + } + }; + + +write_option_list: write_option_list tag_arg; +write_option_list: ; + +nonterm tag_arg +{ + char *option; +}; + +tag_arg: TAG_arg '/' TAG_arg + final { + writeOptions.append( $3->tag->content ); + }; + +tag_machine: tag_machine_head machine_item_list '/' TAG_machine + final { + cgd->closeMachine(); + }; + +tag_machine_head: TAG_machine + final { + cgd->createMachine(); + }; + +machine_item_list: machine_item_list machine_item; +machine_item_list: ; + +machine_item: tag_start_state; +machine_item: tag_error_state; +machine_item: tag_entry_points; +machine_item: tag_state_list; +machine_item: tag_action_list; +machine_item: tag_action_table_list; +machine_item: tag_cond_space_list; + +# +# States. +# + +tag_start_state: TAG_start_state '/' TAG_start_state + final { + unsigned long startState = strtoul( $3->tag->content, 0, 10 ); + cgd->setStartState( startState ); + }; + +tag_error_state: TAG_error_state '/' TAG_error_state + final { + unsigned long errorState = strtoul( $3->tag->content, 0, 10 ); + cgd->setErrorState( errorState ); + }; + +tag_entry_points: TAG_entry_points entry_point_list '/' TAG_entry_points + final { + Attribute *errorAttr = $1->tag->findAttr( "error" ); + if ( errorAttr != 0 ) + cgd->setForcedErrorState(); + }; + +entry_point_list: entry_point_list tag_entry; +entry_point_list: ; + +tag_entry: TAG_entry '/' TAG_entry + final { + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr == 0 ) { + error($1->loc) << "tag <entry_points>::<entry> " + "requires a name attribute" << endp; + } + else { + char *data = $3->tag->content; + unsigned long entry = strtoul( data, &data, 10 ); + cgd->addEntryPoint( nameAttr->value, entry ); + } + }; + +tag_state_list: tag_state_list_head state_list '/' TAG_state_list; + +tag_state_list_head: TAG_state_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <state_list> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initStateList( length ); + curState = 0; + } + }; + +state_list: state_list tag_state; +state_list: ; + +tag_state: TAG_state state_item_list '/' TAG_state + final { + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( idAttr == 0 ) + error($1->loc) << "tag <state> requires an id attribute" << endp; + else { + int id = atoi( idAttr->value ); + cgd->setId( curState, id ); + } + + Attribute *lengthAttr = $1->tag->findAttr( "final" ); + if ( lengthAttr != 0 ) + cgd->setFinal( curState ); + curState += 1; + }; + +state_item_list: state_item_list state_item; +state_item_list: ; + +state_item: tag_state_actions; +state_item: tag_eof_t; +state_item: tag_state_cond_list; +state_item: tag_trans_list; + +tag_state_actions: TAG_state_actions '/' TAG_state_actions + final { + char *ad = $3->tag->content; + + long toStateAction = readOffsetPtr( ad, &ad ); + long fromStateAction = readOffsetPtr( ad, &ad ); + long eofAction = readOffsetPtr( ad, &ad ); + + cgd->setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + }; + +tag_eof_t: TAG_eof_t '/' TAG_eof_t + final { + char *et = $3->tag->content; + long targ = readOffsetPtr( et, &et ); + long eofAction = readOffsetPtr( et, &et ); + + cgd->setEofTrans( curState, targ, eofAction ); + }; + +tag_state_cond_list: tag_state_cond_list_head state_cond_list '/' TAG_cond_list; + +tag_state_cond_list_head: TAG_cond_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <cond_list> requires a length attribute" << endp; + else { + ulong length = readLength( lengthAttr->value ); + cgd->initStateCondList( curState, length ); + curStateCond = 0; + } + }; + +state_cond_list: state_cond_list state_cond; +state_cond_list: ; + +state_cond: TAG_c '/' TAG_c + final { + char *td = $3->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long condId = readOffsetPtr( td, &td ); + cgd->addStateCond( curState, lowKey, highKey, condId ); + }; + +tag_trans_list: tag_trans_list_head trans_list '/' TAG_trans_list + final { + cgd->finishTransList( curState ); + }; + +tag_trans_list_head: TAG_trans_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <trans_list> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initTransList( curState, length ); + curTrans = 0; + } + }; + +trans_list: trans_list tag_trans; +trans_list: ; + +tag_trans: TAG_t '/' TAG_t + final { + char *td = $3->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long targ = readOffsetPtr( td, &td ); + long action = readOffsetPtr( td, &td ); + + cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action ); + }; + +# +# Action Lists. +# + +tag_action_list: tag_action_list_head action_list '/' TAG_action_list; + +tag_action_list_head: TAG_action_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <action_list> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionList( length ); + curAction = 0; + } + }; + +action_list: action_list tag_action; +action_list: ; + +# +# Actions. +# + +tag_action: TAG_action inline_list '/' TAG_action + final { + Attribute *lineAttr = $1->tag->findAttr( "line" ); + Attribute *colAttr = $1->tag->findAttr( "col" ); + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( lineAttr == 0 || colAttr == 0) + error($1->loc) << "tag <action> requires a line and col attributes" << endp; + else { + unsigned long line = strtoul( lineAttr->value, 0, 10 ); + unsigned long col = strtoul( colAttr->value, 0, 10 ); + + char *name = 0; + if ( nameAttr != 0 ) + name = nameAttr->value; + + cgd->newAction( curAction++, name, line, col, $2->inlineList ); + } + }; + +nonterm inline_list +{ + GenInlineList *inlineList; +}; + + +inline_list: inline_list inline_item + final { + /* Append the item to the list, return the list. */ + $1->inlineList->append( $2->inlineItem ); + $$->inlineList = $1->inlineList; + }; + +inline_list: + final { + /* Start with empty list. */ + $$->inlineList = new GenInlineList; + }; + +nonterm inline_item_type +{ + GenInlineItem *inlineItem; +}; + +nonterm inline_item uses inline_item_type; + +inline_item: tag_text final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_goto final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_call final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_next final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_goto_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_call_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_next_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_ret final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_break final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_pchar final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_char final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_hold final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_exec final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_curs final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_targs final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_il_entry final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_init_tokstart final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_init_act final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_get_tokend final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_tokstart final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_tokend final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_act final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_sub_action final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_lm_switch final { $$->inlineItem = $1->inlineItem; }; + +nonterm tag_text uses inline_item_type; +nonterm tag_goto uses inline_item_type; +nonterm tag_call uses inline_item_type; +nonterm tag_next uses inline_item_type; +nonterm tag_goto_expr uses inline_item_type; +nonterm tag_call_expr uses inline_item_type; +nonterm tag_next_expr uses inline_item_type; +nonterm tag_ret uses inline_item_type; +nonterm tag_break uses inline_item_type; +nonterm tag_pchar uses inline_item_type; +nonterm tag_char uses inline_item_type; +nonterm tag_hold uses inline_item_type; +nonterm tag_exec uses inline_item_type; +nonterm tag_curs uses inline_item_type; +nonterm tag_targs uses inline_item_type; +nonterm tag_il_entry uses inline_item_type; +nonterm tag_init_tokstart uses inline_item_type; +nonterm tag_init_act uses inline_item_type; +nonterm tag_get_tokend uses inline_item_type; +nonterm tag_set_tokstart uses inline_item_type; +nonterm tag_set_tokend uses inline_item_type; +nonterm tag_set_act uses inline_item_type; +nonterm tag_sub_action uses inline_item_type; +nonterm tag_lm_switch uses inline_item_type; + +tag_text: TAG_text '/' TAG_text + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Text ); + $$->inlineItem->data = $3->tag->content; + }; + +tag_goto: TAG_goto '/' TAG_goto + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Goto ); + $$->inlineItem->targId = targ; + }; + +tag_call: TAG_call '/' TAG_call + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Call ); + $$->inlineItem->targId = targ; + }; + +tag_next: TAG_next '/' TAG_next + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Next ); + $$->inlineItem->targId = targ; + }; + +tag_goto_expr: TAG_goto_expr inline_list '/' TAG_goto_expr + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::GotoExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_call_expr: TAG_call_expr inline_list '/' TAG_call_expr + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::CallExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_next_expr: TAG_next_expr inline_list '/' TAG_next_expr + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::NextExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_ret: TAG_ret '/' TAG_ret + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Ret ); + }; + +tag_break: TAG_break '/' TAG_break + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Break ); + }; + +tag_pchar: TAG_pchar '/' TAG_pchar + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::PChar ); + }; + +tag_char: TAG_char '/' TAG_char + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Char ); + }; + +tag_hold: TAG_hold '/' TAG_hold + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Hold ); + }; + +tag_exec: TAG_exec inline_list '/' TAG_exec + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Exec ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_curs: TAG_curs '/' TAG_curs + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Curs ); + }; + +tag_targs: TAG_targs '/' TAG_targs + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Targs ); + }; + +tag_il_entry: TAG_entry '/' TAG_entry + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Entry ); + $$->inlineItem->targId = targ; + }; + +tag_init_tokstart: TAG_init_tokstart '/' TAG_init_tokstart + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmInitTokStart ); + }; + +tag_init_act: TAG_init_act '/' TAG_init_act + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmInitAct ); + }; + +tag_get_tokend: TAG_get_tokend '/' TAG_get_tokend + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmGetTokEnd ); + }; + +tag_set_tokstart: TAG_set_tokstart '/' TAG_set_tokstart + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokStart ); + cgd->hasLongestMatch = true; + }; + +tag_set_tokend: TAG_set_tokend '/' TAG_set_tokend + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokEnd ); + $$->inlineItem->offset = strtol( $3->tag->content, 0, 10 ); + }; + +tag_set_act: TAG_set_act '/' TAG_set_act + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetActId ); + $$->inlineItem->lmId = strtol( $3->tag->content, 0, 10 ); + }; + +tag_sub_action: TAG_sub_action inline_list '/' TAG_sub_action + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::SubAction ); + $$->inlineItem->children = $2->inlineList; + }; + +# Action switches. +tag_lm_switch: TAG_lm_switch lm_action_list '/' TAG_lm_switch + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSwitch ); + $$->inlineItem->children = $2->inlineList; + }; + +nonterm lm_action_list +{ + GenInlineList *inlineList; +}; + +lm_action_list: lm_action_list tag_inline_action + final { + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; +lm_action_list: + final { + $$->inlineList = new GenInlineList; + }; + +nonterm tag_inline_action uses inline_item_type; + +tag_inline_action: TAG_sub_action inline_list '/' TAG_sub_action + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::SubAction ); + $$->inlineItem->children = $2->inlineList; + + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( idAttr != 0 ) { + unsigned long id = strtoul( idAttr->value, 0, 10 ); + $$->inlineItem->lmId = id; + } + }; + +# +# Lists of Actions. +# + +tag_action_table_list: + tag_action_table_list_head action_table_list '/' TAG_action_table_list; + +tag_action_table_list_head: TAG_action_table_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + error($1->loc) << "tag <action_table_list> requires " + "a length attribute" << endp; + } + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionTableList( length ); + curActionTable = 0; + } + }; + +action_table_list: action_table_list tag_action_table; +action_table_list: ; + +tag_action_table: TAG_action_table '/' TAG_action_table + final { + /* Find the length of the action table. */ + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <at> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + + /* Collect the action table. */ + RedAction *redAct = cgd->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + char *ptr = $3->tag->content; + int pos = 0; + while ( *ptr != 0 ) { + unsigned long actionId = strtoul( ptr, &ptr, 10 ); + redAct->key[pos].key = 0; + redAct->key[pos].value = cgd->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + cgd->redFsm->actionMap.insert( redAct ); + } + + curActionTable += 1; + }; + +# +# Conditions. +# + +tag_cond_space_list: tag_cond_space_list_head cond_space_list '/' TAG_cond_space_list; + +tag_cond_space_list_head: TAG_cond_space_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + error($1->loc) << "tag <cond_space_list> " + "requires a length attribute" << endp; + } + else { + ulong length = readLength( lengthAttr->value ); + cgd->initCondSpaceList( length ); + curCondSpace = 0; + } + }; + +cond_space_list: cond_space_list tag_cond_space; +cond_space_list: tag_cond_space; + +tag_cond_space: TAG_cond_space '/' TAG_cond_space + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <cond_space> requires a length attribute" << endp; + else { + if ( lengthAttr == 0 ) + error($1->loc) << "tag <cond_space> requires an id attribute" << endp; + else { + unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 ); + ulong length = readLength( lengthAttr->value ); + + char *td = $3->tag->content; + Key baseKey = readKey( td, &td ); + + cgd->newCondSpace( curCondSpace, condSpaceId, baseKey ); + for ( ulong a = 0; a < length; a++ ) { + long actionOffset = readOffsetPtr( td, &td ); + cgd->condSpaceItem( curCondSpace, actionOffset ); + } + curCondSpace += 1; + } + } + }; + +}%% + +%%{ + write types; + write data; +}%% + +void XmlParser::init() +{ + %% write init; +} + +int XmlParser::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + + +unsigned long readLength( char *td ) +{ + return strtoul( td, 0, 10 ); +} + +Key readKey( char *td, char **end ) +{ + if ( keyOps->isSigned ) + return Key( strtol( td, end, 10 ) ); + else + return Key( strtoul( td, end, 10 ) ); +} + +long readOffsetPtr( char *td, char **end ) +{ + while ( *td == ' ' || *td == '\t' ) + td++; + + if ( *td == 'x' ) { + if ( end != 0 ) + *end = td + 1; + return -1; + } + + return strtol( td, end, 10 ); +} + +ostream &XmlParser::warning( const InputLoc &loc ) +{ + cerr << fileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &XmlParser::error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + +ostream &XmlParser::parser_error( int tokId, Token &token ) +{ + gblErrorCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << token.loc.line << ":" << token.loc.col; + if ( token.tag != 0 ) { + if ( token.tag->tagId == 0 ) + cerr << ": at unknown tag"; + else + cerr << ": at tag <" << token.tag->tagId->name << ">"; + } + cerr << ": "; + + return cerr; +} + +ostream &XmlParser::source_error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( sourceFileName != 0 ); + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + +int XmlParser::token( int tokenId, Token &tok ) +{ + int res = parseLangEl( tokenId, &tok ); + if ( res < 0 ) + parser_error( tokenId, tok ) << "parse error" << endp; + return res; +} + +int XmlParser::token( int tokenId, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = 0; + return token( tokenId, tok ); +} + +int XmlParser::token( XMLTag *tag, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = tag; + + if ( tag->type == XMLTag::Close ) { + int res = token( '/', tok ); + if ( res < 0 ) + return res; + } + + tok.tag = tag; + return token( tag->tagId != 0 ? tag->tagId->id : TAG_unknown, tok ); +} diff --git a/ragel/xmlscan.rl b/ragel/xmlscan.rl new file mode 100644 index 00000000..4e9ee4e2 --- /dev/null +++ b/ragel/xmlscan.rl @@ -0,0 +1,315 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <string.h> +#include "vector.h" +#include "xmlparse.h" + +using std::istream; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine XmlScanner; + write data; +}%% + +class Perfect_Hash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); + +public: + static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len); +}; + +XmlScanner::XmlScanner( const char *fileName, istream &input ) : + fileName(fileName), + input(input), + curline(1), + curcol(1), + p(0), pe(0), + done(false), + data(0), data_len(0), + value(0) +{ + %%{ + machine XmlScanner; + write init; + }%% +} + +#define TK_NO_TOKEN (-1) +#define TK_ERR 1 +#define TK_SPACE 2 +#define TK_EOF 3 +#define TK_OpenTag 4 +#define TK_CloseTag 5 + +#define ret_tok( _tok ) token = (_tok); data = ts + +void XmlScanner::adjustAttrPointers( int distance ) +{ + for ( AttrMkList::Iter attr = attrMkList; attr.lte(); attr++ ) { + attr->id -= distance; + attr->value -= distance; + } +} + +/* There is no claim that this is a proper XML parser, but it is good + * enough for our purposes. */ +%%{ + machine XmlScanner; + + action colup { curcol++; } + action start_tok { token_col = curcol; token_line = curline; } + NL = '\n' @{ curcol = 0; curline++; }; + + WS = [\r\t ] | NL; + id = [_a-zA-Z][_a-zA-Z0-9]*; + literal = '"' ( [^"] | NL )* '"'; + + # Attribute identifiers. + action start_attr_id { attr_id_start = p; } + action leave_attr_id { attr_id_len = p - attr_id_start; } + + attr_id = id >start_attr_id %leave_attr_id; + + # Attribute values + action start_attr_value { attr_value_start = p; } + action leave_attr_value + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } + + attr_value = literal >start_attr_value %leave_attr_value; + + # Attribute list. + attribute = attr_id WS* '=' WS* attr_value WS*; + + # Tag identifiers. + action tag_id_start { tag_id_start = p; } + action leave_tag_id { tag_id_len = p - tag_id_start; } + + tag_id = id >tag_id_start %leave_tag_id; + + main := |* + # Tags + ( '<' WS* tag_id ( WS+ attribute* )? '>' ) >start_tok $colup + => { ret_tok( TK_OpenTag ); fbreak; }; + + ( '<' WS* '/' WS* tag_id WS* '>' ) >start_tok $colup + => { ret_tok( TK_CloseTag ); fbreak; }; + + # Data in between tags. + ( [^<&\0] | NL ) $colup + => { buffer.append( *p ); }; + + # Specials. + "&" $colup + => { buffer.append( '&' ); }; + "<" $colup + => { buffer.append( '<' ); }; + ">" $colup + => { buffer.append( '>' ); }; + + # EOF + 0 >start_tok => { ret_tok( TK_EOF ); fbreak; }; + + *|; +}%% + +int XmlScanner::scan( ) +{ + int token = TK_NO_TOKEN; + int space = 0, readlen = 0; + char *attr_id_start = 0; + char *attr_value_start = 0; + int attr_id_len = 0; + int attr_value_len = 0; + + attrMkList.empty(); + buffer.clear(); + + while ( 1 ) { + if ( p == pe ) { + //printf("scanner: need more data\n"); + + if ( ts == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + //printf("scanner: buffer broken mid token\n"); + have = pe - ts; + memmove( buf, ts, have ); + + int distance = ts - buf; + te -= distance; + tag_id_start -= distance; + attr_id_start -= distance; + attr_value_start -= distance; + adjustAttrPointers( distance ); + ts = buf; + } + + p = buf + have; + space = XML_BUFSIZE - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + return TK_SPACE; + } + + if ( done ) { + //printf("scanner: end of file\n"); + p[0] = 0; + readlen = 1; + } + else { + input.read( p, space ); + readlen = input.gcount(); + if ( input.eof() ) { + //printf("scanner: setting done flag\n"); + done = 1; + } + } + + pe = p + readlen; + } + + %% write exec; + + if ( cs == XmlScanner_error ) + return TK_ERR; + + if ( token != TK_NO_TOKEN ) { + data_len = p - data; + return token; + } + } +} + +int xml_parse( std::istream &input, const char *fileName, + bool outputActive, bool wantComplete, + XmlScanner &scanner, XmlParser &parser ) +{ + while ( 1 ) { + int token = scanner.scan(); + if ( token == TK_NO_TOKEN ) { + cerr << "xmlscan: interal error: scanner returned NO_TOKEN" << endl; + exit(1); + } + else if ( token == TK_EOF ) { + parser.token( XmlParser_tk_eof, scanner.token_col, scanner.token_line ); + break; + } + else if ( token == TK_ERR ) { + scanner.error() << "scanner error" << endl; + break; + } + else if ( token == TK_SPACE ) { + scanner.error() << "scanner is out of buffer space" << endl; + break; + } + else { + /* All other tokens are either open or close tags. */ + XMLTagHashPair *tagId = Perfect_Hash::in_word_set( + scanner.tag_id_start, scanner.tag_id_len ); + + XMLTag *tag = new XMLTag( tagId, token == TK_OpenTag ? + XMLTag::Open : XMLTag::Close ); + + if ( tagId != 0 ) { + /* Get attributes for open tags. */ + if ( token == TK_OpenTag && scanner.attrMkList.length() > 0 ) { + tag->attrList = new AttrList; + for ( AttrMkList::Iter attr = scanner.attrMkList; + attr.lte(); attr++ ) + { + Attribute newAttr; + newAttr.id = new char[attr->idLen+1]; + memcpy( newAttr.id, attr->id, attr->idLen ); + newAttr.id[attr->idLen] = 0; + + /* Exclude the surrounding quotes. */ + newAttr.value = new char[attr->valueLen-1]; + memcpy( newAttr.value, attr->value+1, attr->valueLen-2 ); + newAttr.value[attr->valueLen-2] = 0; + + tag->attrList->append( newAttr ); + } + } + + /* Get content for closing tags. */ + if ( token == TK_CloseTag ) { + switch ( tagId->id ) { + case TAG_host: case TAG_arg: + case TAG_t: case TAG_alphtype: + case TAG_text: case TAG_goto: + case TAG_call: case TAG_next: + case TAG_entry: case TAG_set_tokend: + case TAG_set_act: case TAG_start_state: + case TAG_error_state: case TAG_state_actions: + case TAG_action_table: case TAG_cond_space: + case TAG_c: case TAG_ex: case TAG_eof_t: + tag->content = new char[scanner.buffer.length+1]; + memcpy( tag->content, scanner.buffer.data, + scanner.buffer.length ); + tag->content[scanner.buffer.length] = 0; + break; + } + } + } + + #if 0 + cerr << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") << + ": " << (tag->tagId != 0 ? tag->tagId->name : "<unknown>") << endl; + if ( tag->attrList != 0 ) { + for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ ) + cerr << " " << attr->id << ": " << attr->value << endl; + } + if ( tag->content != 0 ) + cerr << " content: " << tag->content << endl; + #endif + + parser.token( tag, scanner.token_col, scanner.token_line ); + } + } + + return 0; +} + +std::ostream &XmlScanner::error() +{ + gblErrorCount += 1; + cerr << fileName << ":" << curline << ":" << curcol << ": "; + return cerr; +} diff --git a/ragel/xmltags.gperf b/ragel/xmltags.gperf new file mode 100644 index 00000000..1ca544f7 --- /dev/null +++ b/ragel/xmltags.gperf @@ -0,0 +1,95 @@ +/* + * Copyright 2005 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +%{ +#include <string.h> +#include "xmlparse.h" +%} +%compare-strncmp +struct XMLTagHashPair; +%% +ragel, TAG_ragel +ragel_def, TAG_ragel_def +host, TAG_host +state_list, TAG_state_list +state, TAG_state +trans_list, TAG_trans_list +t, TAG_t +machine, TAG_machine +start_state, TAG_start_state +error_state, TAG_error_state +action_list, TAG_action_list +action, TAG_action +action_table_list, TAG_action_table_list +action_table, TAG_action_table +alphtype, TAG_alphtype +getkey, TAG_getkey +state_actions, TAG_state_actions +entry_points, TAG_entry_points +text, TAG_text +goto, TAG_goto +call, TAG_call +next, TAG_next +goto_expr, TAG_goto_expr +call_expr, TAG_call_expr +next_expr, TAG_next_expr +ret, TAG_ret +pchar, TAG_pchar +char, TAG_char +hold, TAG_hold +exec, TAG_exec +curs, TAG_curs +targs, TAG_targs +entry, TAG_entry +data, TAG_data +lm_switch, TAG_lm_switch +sub_action, TAG_sub_action +init_act, TAG_init_act +set_act, TAG_set_act +get_tokend, TAG_get_tokend +set_tokend, TAG_set_tokend +init_tokstart, TAG_init_tokstart +set_tokstart, TAG_set_tokstart +write, TAG_write +access, TAG_access +break, TAG_break +arg, TAG_arg +cond_space_list, TAG_cond_space_list +cond_space, TAG_cond_space +cond_list, TAG_cond_list +c, TAG_c +exports, TAG_exports +ex, TAG_ex +p_expr, TAG_p_expr +pe_expr, TAG_pe_expr +eof_expr, TAG_eof_expr +cs_expr, TAG_cs_expr +top_expr, TAG_top_expr +stack_expr, TAG_stack_expr +act_expr, TAG_act_expr +tokstart_expr, TAG_tokstart_expr +tokend_expr, TAG_tokend_expr +data_expr, TAG_data_expr +prepush, TAG_prepush +postpop, TAG_postpop +eof_t, TAG_eof_t |