diff options
Diffstat (limited to 'libfsm')
88 files changed, 34989 insertions, 0 deletions
diff --git a/libfsm/.exrc b/libfsm/.exrc new file mode 100644 index 00000000..412b360f --- /dev/null +++ b/libfsm/.exrc @@ -0,0 +1,28 @@ +if &cp | set nocp | endif +let s:cpo_save=&cpo +set cpo&vim +map <NL> j +map k +map Q gq +nmap gx <Plug>NetrwBrowseX +nnoremap <silent> <Plug>NetrwBrowseX :call netrw#NetrwBrowseX(expand("<cWORD>"),0)
+let &cpo=s:cpo_save +unlet s:cpo_save +set autoindent +set autowriteall +set backspace=2 +set fileencodings=ucs-bom,utf-8,default,latin1 +set helplang=en +set incsearch +set nojoinspaces +set makeprg=make\ -j4 +set printoptions=paper:letter +set ruler +set runtimepath=~/.vim,/var/lib/vim/addons,/usr/share/vim/vimfiles,/usr/share/vim/vim74,/usr/share/vim/vimfiles/after,/var/lib/vim/addons/after,~/.vim/after +set showcmd +set showmatch +set suffixes=.bak,~,.swp,.o,.info,.aux,.log,.dvi,.bbl,.blg,.brf,.cb,.ind,.idx,.ilg,.inx,.out,.toc +set viminfo='20,\"50 +set visualbell +set nowritebackup +" vim: set ft=vim : diff --git a/libfsm/.gitignore b/libfsm/.gitignore new file mode 100644 index 00000000..78db4968 --- /dev/null +++ b/libfsm/.gitignore @@ -0,0 +1,57 @@ +/tags +/Makefile +/Makefile.in +/rlscan.cc +/rlparse.cc +/rlparse.h +/version.h +/config.h +/config.h.in +/config.h.in~ +/ragel +/ragel.exe +/.deps +/stamp-h1 +/rlhc +/rlhc.c + +/*.lo + +# Parsing +/parse.c +/rlreduce.cc +/ldparse.c +/ldreduce.cc + +# Common testing file. +/tmp.rl +/tmp.c +/tmp.cc +/tmp.d +/tmp.go +/tmp.ps +/tmp.ml +/tmp.cmi +/tmp.cmx +/tmp.rs +/tmp.crk +/tmp.jl +/tmp +/input + +# The ragel frontend doesn't support OCaml lexical rules yet, so a util is +# needed. +/util.ml +/util.cmi +/util.cmx + +/libragel.a +/libragel.la +/libfsm.a +/libfsm.la +/.libs + +/CMakeFiles +/cmake_install.cmake + +/*.pack diff --git a/libfsm/CMakeLists.txt b/libfsm/CMakeLists.txt new file mode 100644 index 00000000..3e797981 --- /dev/null +++ b/libfsm/CMakeLists.txt @@ -0,0 +1,154 @@ +# Package name +set(_PACKAGE_NAME ragel) + +# Read project configuration from ../configure.ac file +file(STRINGS ../configure.ac _PROJECT_CONFIGS + REGEX "(RAGEL_VERSION=)|(RAGEL_PUBDATE=)") +foreach(_PROJECT_CONFIG ${_PROJECT_CONFIGS}) + if(_PROJECT_CONFIG MATCHES "RAGEL_VERSION=\"([^\"]+)") + string(STRIP ${CMAKE_MATCH_1} RAGEL_VERSION) + endif() + if(_PROJECT_CONFIG MATCHES "RAGEL_PUBDATE=\"([^\"]+)") + string(STRIP ${CMAKE_MATCH_1} RAGEL_PUBDATE) + endif() +endforeach() + +## Generate headers +configure_file(version.h.cmake.in version.h @ONLY) +configure_file(ragel-config.cmake.in + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" @ONLY) + +# Runtime headers +set(RUNTIME_HDR + action.h fsmgraph.h ragel.h common.h + gendata.h redfsm.h dot.h) + +# Other CMake modules +include(GNUInstallDirs) + +# libfsm +add_library(libfsm + buffer.h codegen.h + actloop.h actexp.h + tables.h + binary.h bingoto.h binbreak.h binvar.h + flat.h flatgoto.h flatbreak.h flatvar.h + switch.h switchgoto.h switchbreak.h switchvar.h + goto.h gotoloop.h gotoexp.h + ipgoto.h asm.h + idbase.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc fsmgraph.cc + fsmap.cc fsmcond.cc fsmnfa.cc common.cc redfsm.cc gendata.cc + allocgen.cc codegen.cc + actexp.cc binvar.cc + tables.cc tabgoto.cc tabbreak.cc tabvar.cc + binary.cc bingoto.cc binbreak.cc actloop.cc + flat.cc flatgoto.cc flatbreak.cc flatvar.cc + switch.cc switchgoto.cc switchbreak.cc switchvar.cc + goto.cc gotoloop.cc gotoexp.cc ipgoto.cc + dot.cc asm.cc) + +target_include_directories(libfsm + PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/../src> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl> + $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) + +set_target_properties(libfsm PROPERTIES + OUTPUT_NAME fsm) + +# libragel +add_library(libragel + # dist + parsedata.h parsetree.h inputdata.h pcheck.h reducer.h rlscan.h load.h + parsetree.cc longest.cc parsedata.cc inputdata.cc load.cc reducer.cc) + +if(BUILD_STANDALONE) + # libragel acts as an intermediate library so we can apply + # flags we want to apply to all ragel targets to libragel + # and they'll automatically propogate. This is a best effort + # to get `-static` placed sooner in the link line where it + # matters at least. + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + target_link_libraries(libragel PUBLIC -static) + else() + message(FATAL_ERROR "Unsupported toolset for standalone build.") + endif() +endif() + +target_link_libraries(libragel PRIVATE colm::libcolm) + +target_include_directories(libragel + PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/..> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl> + $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) + +set_target_properties(libragel PROPERTIES + OUTPUT_NAME ragel) + +set_property(TARGET libragel APPEND PROPERTY + COMPILE_DEFINITIONS BINDIR="${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}") + +# ragel program + +set(RAGEL_LM + rlparse.lm + ragel.lm + rlreduce.lm) + +add_custom_command(OUTPUT + "${CMAKE_CURRENT_BINARY_DIR}/parse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + DEPENDS ${RAGEL_LM} #$(COLM_BINDEP) + COMMAND colm::colm + ARGS -c -b rlparseC + -o "${CMAKE_CURRENT_BINARY_DIR}/parse.c" + -m "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc" + rlparse.lm + WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") + +add_executable(ragel + main.cc + "${CMAKE_CURRENT_BINARY_DIR}/parse.c" + "${CMAKE_CURRENT_BINARY_DIR}/rlreduce.cc") + +target_link_libraries(ragel libragel libfsm) + +foreach(_SUBDIR host-ruby host-asm host-julia host-ocaml host-c host-d + host-csharp host-go host-java host-rust host-crack host-js) + add_subdirectory(${_SUBDIR}) +endforeach() + +if(${PROJECT_NAME}_MAKE_INSTALL) + if(NOT DEFINED CMAKE_INSTALL_CMAKEDIR) + set(CMAKE_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/${_PACKAGE_NAME}" + CACHE STRING "CMake packages") + endif() + install(FILES ${RUNTIME_HDR} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/ragel") + install(TARGETS libfsm libragel ragel + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") + install(EXPORT ${_PACKAGE_NAME}-targets + NAMESPACE ${_PACKAGE_NAME}:: + DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") + export(EXPORT ${_PACKAGE_NAME}-targets + NAMESPACE ${_PACKAGE_NAME}:: + FILE "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-targets.cmake") + include(CMakePackageConfigHelpers) + write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" + VERSION ${RAGEL_VERSION} + COMPATIBILITY AnyNewerVersion) + install(FILES + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" + DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") +endif() diff --git a/libfsm/Makefile.am b/libfsm/Makefile.am new file mode 100644 index 00000000..5a3761e9 --- /dev/null +++ b/libfsm/Makefile.am @@ -0,0 +1,46 @@ +# libfsm contains only the FSM construction code and the backend code +# generators for C, asm and cgil (Code Gen Intermediate Language) . It is +# useful for building state machine code generators in programs not connected +# to the ragel language. +lib_LTLIBRARIES = libfsm.la + +pkginclude_HEADERS = \ + action.h fsmgraph.h common.h \ + gendata.h redfsm.h dot.h + +# nodist_pkginclude_HEADERS = config.h + +# +# libfsm: state machine construction and direct code generation. +# +libfsm_la_CPPFLAGS = -I$(top_srcdir)/aapl + +dist_libfsm_la_SOURCES = \ + buffer.h codegen.h \ + actloop.h actexp.h \ + tables.h \ + binary.h bingoto.h binbreak.h binvar.h \ + flat.h flatgoto.h flatbreak.h flatvar.h \ + switch.h switchgoto.h switchbreak.h switchvar.h \ + goto.h gotoloop.h gotoexp.h \ + ipgoto.h asm.h \ + idbase.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc fsmgraph.cc \ + fsmap.cc fsmcond.cc fsmnfa.cc common.cc redfsm.cc gendata.cc \ + allocgen.cc codegen.cc \ + actexp.cc binvar.cc \ + tables.cc tabgoto.cc tabbreak.cc tabvar.cc \ + binary.cc bingoto.cc binbreak.cc actloop.cc \ + flat.cc flatgoto.cc flatbreak.cc flatvar.cc \ + switch.cc switchgoto.cc switchbreak.cc switchvar.cc \ + goto.cc gotoloop.cc gotoexp.cc ipgoto.cc \ + dot.cc asm.cc + +nodist_libfsm_la_SOURCES = \ + version.h + +libfsm_la_LDFLAGS = -no-undefined + +if LINKER_NO_UNDEFINED +libfsm_la_LDFLAGS += -Wl,--no-undefined +endif + diff --git a/libfsm/actexp.cc b/libfsm/actexp.cc new file mode 100644 index 00000000..771d4623 --- /dev/null +++ b/libfsm/actexp.cc @@ -0,0 +1,218 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "actexp.h" +#include "redfsm.h" +#include "gendata.h" + +void ActExp::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId + 1; + fromStateActions.value( act ); +} + +void ActExp::COND_ACTION( RedCondPair *cond ) +{ + int action = 0; + if ( cond->action != 0 ) + action = cond->action->actListId + 1; + condActions.value( action ); +} + +void ActExp::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId + 1; + toStateActions.value( act ); +} + +void ActExp::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId + 1; + eofActions.value( act ); +} + +void ActExp::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void ActExp::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &ActExp::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, false, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &ActExp::ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, false, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &ActExp::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, false, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &ActExp::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( 0, true, false ) ); + out << "\n\t"; + } + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + + +void ActExp::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[" << vCS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void ActExp::REG_ACTIONS( std::string cond ) +{ + out << + " switch ( " << ARR_REF( condActions ) << "[" << cond << "] ) {\n"; + ACTION_SWITCH() << + " }\n" + "\n"; +} +void ActExp::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << ARR_REF( toStateActions ) << "[" << vCS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + + +void ActExp::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " switch ( " << ARR_REF( eofActions ) << "[" << vCS() << "] ) {\n"; + EOF_ACTION_SWITCH() << + " }\n"; + } +} + +void ActExp::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[nfa_bp[nfa_len].state] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + diff --git a/libfsm/actexp.h b/libfsm/actexp.h new file mode 100644 index 00000000..49165755 --- /dev/null +++ b/libfsm/actexp.h @@ -0,0 +1,62 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _BINEXP_H +#define _BINEXP_H + +#include "binary.h" + +struct RedStateAp; +struct RedCondPair; + +class ActExp + : public virtual Tables +{ +public: + ActExp( const CodeGenArgs &args ) + : + Tables( args ) + {} + + virtual void FROM_STATE_ACTION( RedStateAp *state ); + virtual void COND_ACTION( RedCondPair *cond ); + virtual void TO_STATE_ACTION( RedStateAp *state ); + virtual void EOF_ACTION( RedStateAp *state ); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &ACTION_SWITCH(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + virtual void TO_STATE_ACTIONS(); + virtual void REG_ACTIONS( std::string cond ); + virtual void FROM_STATE_ACTIONS(); + virtual void EOF_ACTIONS(); + + virtual void NFA_FROM_STATE_ACTION_EXEC(); +}; + +#endif + diff --git a/libfsm/action.h b/libfsm/action.h new file mode 100644 index 00000000..39169202 --- /dev/null +++ b/libfsm/action.h @@ -0,0 +1,116 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ACTION_H +#define _ACTION_H + +#include "fsmgraph.h" + +struct NameInst; +struct NameRef; +struct LongestMatch; +struct InlineList; + +/* + * Inline code tree + */ +struct InlineItem +{ + enum Type + { + Text, Goto, Call, Ncall, Next, GotoExpr, CallExpr, NcallExpr, NextExpr, Ret, Nret, + PChar, Char, Hold, Curs, Targs, Entry, Exec, Break, Nbreak, + LmSwitch, LmSetActId, LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, + LmInitAct, LmInitTokStart, LmSetTokStart, LmNfaOnNext, LmNfaOnLast, LmNfaOnEof, Stmt, Subst, + NfaWrapAction, NfaWrapConds + }; + + InlineItem( const InputLoc &loc, std::string data, Type type ) : + loc(loc), data(data), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : + loc(loc), nameRef(nameRef), children(0), type(type) { } + + InlineItem( const InputLoc &loc, LongestMatch *longestMatch, + LongestMatchPart *longestMatchPart, Type type ) : loc(loc), + nameRef(0), children(0), longestMatch(longestMatch), + longestMatchPart(longestMatchPart), type(type) { } + + InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : + loc(loc), nameRef(0), nameTarg(nameTarg), children(0), + type(type) { } + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, Action *wrappedAction, Type type ) + : + loc(loc), nameRef(0), children(0), longestMatch(0), + longestMatchPart(0), wrappedAction(wrappedAction), type(type) + {} + + InlineItem( const InputLoc &loc, CondSpace *condSpace, + const CondKeySet &condKeySet, Type type ) + : + loc(loc), nameRef(0), children(0), longestMatch(0), + longestMatchPart(0), wrappedAction(0), condSpace(condSpace), + condKeySet(condKeySet), type(type) + {} + + ~InlineItem(); + + InputLoc loc; + std::string data; + NameRef *nameRef; + NameInst *nameTarg; + InlineList *children; + LongestMatch *longestMatch; + LongestMatchPart *longestMatchPart; + long substPos; + Action *wrappedAction; + CondSpace *condSpace; + CondKeySet condKeySet; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + +struct InlineBlock +{ + InlineBlock( const InputLoc &loc, InlineList *inlineList ) + : loc(loc), inlineList(inlineList) {} + + ~InlineBlock() + { + inlineList->empty(); + delete inlineList; + } + + InputLoc loc; + InlineList *inlineList; +}; + +#endif diff --git a/libfsm/actloop.cc b/libfsm/actloop.cc new file mode 100644 index 00000000..675e78fa --- /dev/null +++ b/libfsm/actloop.cc @@ -0,0 +1,229 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "actloop.h" +#include "redfsm.h" +#include "gendata.h" + +void ActLoop::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + fromStateActions.value( act ); +} + +void ActLoop::COND_ACTION( RedCondPair *cond ) +{ + int act = 0; + if ( cond->action != 0 ) + act = cond->action->location+1; + condActions.value( act ); +} + +void ActLoop::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + toStateActions.value( act ); +} + +void ActLoop::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + eofActions.value( act ); +} + +void ActLoop::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void ActLoop::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + +std::ostream &ActLoop::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &ActLoop::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &ActLoop::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + + +std::ostream &ActLoop::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t " << CASE( STR( act->actionId ) ) << " {\n"; + ACTION( out, act, IlOpts( 0, true, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + + + +void ActLoop::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( fromStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST(UINT()) << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + +void ActLoop::REG_ACTIONS( std::string cond ) +{ + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( condActions ) + "[" + cond + "]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << " )\n" + " {\n"; + ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; +} + +void ActLoop::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( toStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST(UINT()) << DEREF( ARR_REF( actions ), acts.ref() ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), acts.ref() ) << " ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + +void ActLoop::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( eofActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST(UINT()) << DEREF( ARR_REF( actions ), acts.ref() ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), acts.ref() ) << " ) {\n"; + EOF_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n"; + } +} + +void ActLoop::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( fromStateActions ) + "[nfa_bp[nfa_len].state]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), acts.ref() ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), acts.ref() ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + diff --git a/libfsm/actloop.h b/libfsm/actloop.h new file mode 100644 index 00000000..238ba72a --- /dev/null +++ b/libfsm/actloop.h @@ -0,0 +1,63 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _BINLOOP_H +#define _BINLOOP_H + +#include "binary.h" +#include "tables.h" + +struct RedStateAp; +struct RedCondPair; + +class ActLoop + : public virtual Tables +{ +public: + ActLoop( const CodeGenArgs &args ) + : + Tables( args ) + {} + + virtual void FROM_STATE_ACTION( RedStateAp *state ); + virtual void COND_ACTION( RedCondPair *cond ); + virtual void TO_STATE_ACTION( RedStateAp *state ); + virtual void EOF_ACTION( RedStateAp *state ); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &ACTION_SWITCH(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + virtual void FROM_STATE_ACTIONS(); + virtual void REG_ACTIONS( std::string cond ); + virtual void TO_STATE_ACTIONS(); + virtual void EOF_ACTIONS(); + + virtual void NFA_FROM_STATE_ACTION_EXEC(); +}; + + +#endif diff --git a/libfsm/allocgen.cc b/libfsm/allocgen.cc new file mode 100644 index 00000000..fee37940 --- /dev/null +++ b/libfsm/allocgen.cc @@ -0,0 +1,138 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include "gendata.h" +#include "inputdata.h" +#include "version.h" + +/* + * Code generators. + */ +#include "bingoto.h" +#include "binbreak.h" +#include "binvar.h" +#include "flatgoto.h" +#include "flatbreak.h" +#include "flatvar.h" +#include "switchgoto.h" +#include "switchbreak.h" +#include "switchvar.h" +#include "gotoloop.h" +#include "gotoexp.h" +#include "ipgoto.h" +#include "asm.h" + +CodeGenData *makeCodeGenAsm( const HostLang *hostLang, const CodeGenArgs &args ) +{ + return new AsmCodeGen( args ); +} + +/* Invoked by the parser when a ragel definition is opened. */ +CodeGenData *makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ) +{ + FsmGbl *id = args.id; + CodeGenData *codeGen = 0; + BackendFeature feature = hostLang->feature; + if ( args.forceVar ) + feature = VarFeature; + + switch ( args.codeStyle ) { + case GenBinaryLoop: + if ( feature == GotoFeature ) + codeGen = new BinGotoLoop( args ); + else if ( feature == BreakFeature ) + codeGen = new BinBreakLoop( args ); + else + codeGen = new BinVarLoop( args ); + break; + + case GenBinaryExp: + if ( feature == GotoFeature ) + codeGen = new BinGotoExp( args ); + else if ( feature == BreakFeature ) + codeGen = new BinBreakExp( args ); + else + codeGen = new BinVarExp( args ); + break; + + case GenFlatLoop: + if ( feature == GotoFeature ) + codeGen = new FlatGotoLoop( args ); + else if ( feature == BreakFeature ) + codeGen = new FlatBreakLoop( args ); + else + codeGen = new FlatVarLoop( args ); + break; + + case GenFlatExp: + if ( feature == GotoFeature ) + codeGen = new FlatGotoExp( args ); + else if ( feature == BreakFeature ) + codeGen = new FlatBreakExp( args ); + else + codeGen = new FlatVarExp( args ); + break; + case GenSwitchLoop: + if ( feature == GotoFeature ) + codeGen = new SwitchGotoLoop( args ); + else if ( feature == BreakFeature ) + codeGen = new SwitchBreakLoop( args ); + else + codeGen = new SwitchVarLoop( args ); + break; + + case GenSwitchExp: + if ( feature == GotoFeature ) + codeGen = new SwitchGotoExp( args ); + else if ( feature == BreakFeature ) + codeGen = new SwitchBreakExp( args ); + else + codeGen = new SwitchVarExp( args ); + break; + + + case GenGotoLoop: + if ( feature == GotoFeature ) + codeGen = new GotoLoop(args); + else + id->error() << "unsupported lang/style combination" << endp; + break; + case GenGotoExp: + if ( feature == GotoFeature ) + codeGen = new GotoExp(args); + else + id->error() << "unsupported lang/style combination" << endp; + break; + + case GenIpGoto: + if ( feature == GotoFeature ) + codeGen = new IpGoto(args); + else + id->error() << "unsupported lang/style combination" << endp; + break; + } + + return codeGen; +} diff --git a/libfsm/asm.cc b/libfsm/asm.cc new file mode 100644 index 00000000..ca9f5d65 --- /dev/null +++ b/libfsm/asm.cc @@ -0,0 +1,2047 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "asm.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" +#include "ragel.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" +#include "parsedata.h" +#include "inputdata.h" +#include <sstream> + +using std::ostream; +using std::ostringstream; +using std::string; +using std::endl; +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +extern int numSplitPartitions; +bool printStatistics = false; + +/* Enables transition logging in the form that score-based state sorting can + * processes. This bit of code is intended to increase locality and reduce + * cache misses. Gains are minimal, 1-2%. */ +// #define LOG_TRANS 1 + +void asmLineDirective( ostream &out, const char *fileName, int line ) +{ + /* Write the preprocessor line info for to the input file. */ + out << "#line " << line << " \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } + out << '"'; + + out << '\n'; +} + +/* Init code gen with in parameters. */ +AsmCodeGen::AsmCodeGen( const CodeGenArgs &args ) +: + CodeGenData( args ), + nextLmSwitchLabel( 1 ), + stackCS( false ) +{ +} + +void AsmCodeGen::genAnalysis() +{ + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + + /* Choose default transitions and make the flat transitions by character class. */ + redFsm->chooseDefaultSpan(); + redFsm->makeFlatClass(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); +} + +/* Write out the fsm name. */ +string AsmCodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string AsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +string AsmCodeGen::ACCESS() +{ + ostringstream ret; + if ( red->accessExpr != 0 ) + INLINE_LIST( ret, red->accessExpr, 0, false, false ); + return ret.str(); +} + + +string AsmCodeGen::P() +{ + ostringstream ret; + if ( red->pExpr == 0 ) + ret << "%r12"; + else { + INLINE_LIST( ret, red->pExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::PE() +{ + ostringstream ret; + if ( red->peExpr == 0 ) + ret << "%r13"; + else { + INLINE_LIST( ret, red->peExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::vCS() +{ + ostringstream ret; + if ( red->csExpr == 0 ) { + if ( stackCS ) + ret << "-48(%rbp)"; + else + ret << "%r11"; + } + else { + INLINE_LIST( ret, red->csExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::TOP() +{ + ostringstream ret; + if ( red->topExpr == 0 ) + ret << "-64(%rbp)"; + else { + ret << "("; + INLINE_LIST( ret, red->topExpr, 0, false, false ); + ret << ")"; + } + return ret.str(); +} + +string AsmCodeGen::NFA_STACK() +{ + return string( "-80(%rbp)" ); +} + +string AsmCodeGen::NFA_TOP() +{ + return string( "-88(%rbp)" ); +} + +string AsmCodeGen::NFA_SZ() +{ + return string( "-96(%rbp)" ); +} + +string AsmCodeGen::STACK() +{ + ostringstream ret; + if ( red->stackExpr == 0 ) + ret << "-56(%rbp)"; + else { + ret << "("; + INLINE_LIST( ret, red->stackExpr, 0, false, false ); + ret << ")"; + } + return ret.str(); +} + +string AsmCodeGen::vEOF() +{ + ostringstream ret; + if ( red->eofExpr == 0 ) + ret << "-8(%rbp)"; + else { + INLINE_LIST( ret, red->eofExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::TOKSTART() +{ + ostringstream ret; + if ( red->tokstartExpr == 0 ) + ret << "-16(%rbp)"; + else { + INLINE_LIST( ret, red->tokstartExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::TOKEND() +{ + ostringstream ret; + if ( red->tokendExpr == 0 ) + ret << "-24(%rbp)"; + else { + INLINE_LIST( ret, red->tokendExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::ACT() +{ + ostringstream ret; + if ( red->actExpr == 0 ) + ret << "-32(%rbp)"; + else { + INLINE_LIST( ret, red->actExpr, 0, false, false ); + } + return ret.str(); +} + +string AsmCodeGen::NBREAK() +{ + return string("-33(%rbp)"); +} + +string AsmCodeGen::GET_KEY() +{ + ostringstream ret; + if ( red->getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, red->getKeyExpr, 0, false, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "(" << P() << ")"; + } + return ret.str(); +} + +string AsmCodeGen::COND_KEY( CondKey key ) +{ + ostringstream ret; + ret << "$" << key.getVal(); + return ret.str(); +} + + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string AsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + ret << "$" << key.getVal(); + return ret.str(); +} + +bool AsmCodeGen::isAlphTypeSigned() +{ + return keyOps->isSigned; +} + +void AsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + + ret << + " subq $1, "; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << + "\n" + " movq "; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << ", " << P() << "\n"; +} + +void AsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item, + int targState, int inFinish, bool csForced ) +{ + long done = nextLmSwitchLabel++; + + ret << + " movq " << ACT() << ", %rax\n"; + + for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) { + long l = nextLmSwitchLabel++; + + /* Write the case label, the action and the case break. */ + if ( lma->lmId < 0 ) { + } + else { + ret << + " cmpq $" << lma->lmId << ", %rax\n" + " jne " << LABEL( "lm_switch_next", l ) << "\n"; + } + + INLINE_LIST( ret, lma->children, targState, inFinish, csForced ); + + ret << + " jmp " << LABEL( "lm_done", done ) << "\n" + "" << LABEL( "lm_switch_next", l ) << ":\n"; + } + + ret << + "" << LABEL( "lm_done", done ) << ":\n"; +} + +void AsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq $" << item->lmId << ", " << ACT() << "\n"; +} + +void AsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + /* Sets tokend, there may be an offset. */ + ret << + " movq " << P() << ", %rax\n"; + + if ( item->offset != 0 ) { + out << + " addq $" << item->offset << ", %rax\n"; + } + + out << + " movq %rax, " << TOKEND() << "\n"; +} + +void AsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq " << TOKEND() << ", " << "%rax\n"; +} + +void AsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq $0, " << TOKSTART() << "\n"; +} + +void AsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq $0, " << ACT() << "\n"; +} + +void AsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << + " movq " << P() << ", " << TOKSTART() << "\n"; +} + +void AsmCodeGen::HOST_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::HOST_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::HOST_TEXT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::GEN_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::GEN_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} + +void AsmCodeGen::LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D code. + * If the inline list is a single word it will get interpreted as a C-style + * cast by the D compiler. This should be in the D code generator. */ + INLINE_LIST( ret, item->children, targState, inFinish, false ); + + ret << + " movq %rax, " << P() << "\n" + " subq $1, " << P() << "\n"; +} + +void AsmCodeGen::NBREAK( ostream &ret, int targState, bool csForced ) +{ + outLabelUsed = true; + ret << + " addq $1, " << P() << "\n"; + + if ( !csForced ) { + ret << + " movq $" << targState << ", " << vCS() << "\n"; + } + + ret << + " movb $1, " << NBREAK() << "\n" + " jmp " << LABEL( "pop" ) << "\n"; +} + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void AsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Text: + ret << item->data; + break; + case GenInlineItem::Goto: + GOTO( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Call: + CALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::Next: + NEXT( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Ret: + RET( ret, inFinish ); + break; + case GenInlineItem::PChar: + ret << P(); + break; + case GenInlineItem::Char: + ret << GET_KEY(); + break; + case GenInlineItem::Hold: + ret << + " subq $1, " << P() << "\n"; + break; + case GenInlineItem::Exec: + EXEC( ret, item, targState, inFinish ); + break; + case GenInlineItem::Curs: + CURS( ret, inFinish ); + break; + case GenInlineItem::Targs: + TARGS( ret, inFinish, targState ); + break; + case GenInlineItem::Entry: + ret << item->targState->id; + break; + case GenInlineItem::GotoExpr: + GOTO_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::CallExpr: + CALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::NextExpr: + NEXT_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case GenInlineItem::LmGetTokEnd: + GET_TOKEND( ret, item ); + break; + case GenInlineItem::LmInitTokStart: + INIT_TOKSTART( ret, item ); + break; + case GenInlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case GenInlineItem::Break: + BREAK( ret, targState, csForced ); + break; + /* Stubbed. */ + case GenInlineItem::Ncall: + NCALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::NcallExpr: + NCALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::Nret: + NRET( ret, inFinish ); + break; + case GenInlineItem::Nbreak: + NBREAK( ret, targState, csForced ); + break; + case GenInlineItem::LmCase: + break; + + case GenInlineItem::LmExec: + LM_EXEC( ret, item, targState, inFinish ); + break; + + case GenInlineItem::LmHold: + ret << + " subq $1, " << P() << "\n"; + break; + case GenInlineItem::NfaClear: + ret << + " movq $0, " << NFA_TOP() << "\n"; + break; + + case GenInlineItem::HostStmt: + HOST_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostExpr: + HOST_EXPR( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostText: + HOST_TEXT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenStmt: + GEN_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenExpr: + GEN_EXPR( ret, item, targState, inFinish, csForced ); + break; + /* Handled at the top level. */ + case GenInlineItem::NfaWrapAction: + case GenInlineItem::NfaWrapConds: + break; + } + } +} +/* Write out paths in line directives. Escapes any special characters. */ +string AsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void AsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, + bool inFinish, bool csForced ) +{ + /* Write the preprocessor line info for going into the source file. */ + asmLineDirective( ret, action->loc.fileName, action->loc.line ); + + /* Write the block and close it off. */ + INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced ); +} + +void AsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << "\n"; + asmLineDirective( ret, condition->loc.fileName, condition->loc.line ); + INLINE_LIST( ret, condition->inlineList, 0, false, false ); +} + +bool singleItem( GenAction *action, GenInlineItem::Type type ) +{ + return action->inlineList->length() == 1 && + action->inlineList->head->type == type; +} + +void AsmCodeGen::NFA_CONDITION( ostream &ret, GenAction *condition, bool last ) +{ + if ( singleItem( condition, GenInlineItem::NfaWrapAction ) ) + { + GenAction *action = condition->inlineList->head->wrappedAction; + ACTION( out, action, 0, false, false ); + } + else if ( singleItem( condition, GenInlineItem::NfaWrapConds ) ) + { + GenCondSpace *condSpace = condition->inlineList->head->condSpace; + const CondKeySet &condKeySet = condition->inlineList->head->condKeySet; + + out << " movq $0, %r9\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << + " pushq %r9\n"; + + CONDITION( out, *csi ); + out << + "\n" + " test %eax, %eax\n" + " setne %cl\n" + " movsbq %cl, %rcx\n" + " salq $" << csi.pos() << ", %rcx\n" + " popq %r9\n" + " addq %rcx, %r9\n"; + } + + for ( int c = 0; c < condKeySet.length(); c++ ) { + CondKey key = condKeySet[c]; + out << + " cmpq " << COND_KEY( key ) << ", %r9\n" + " je 102f\n"; + } + + out << + " jmp " << LABEL( "pop_fail" ) << "\n" + "102:\n"; + } + else { + CONDITION( ret, condition ); + out << + " test %eax, %eax\n" + " jz " << LABEL( "pop_fail" ) << "\n"; + } +} + +string AsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string AsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +void AsmCodeGen::writeInit() +{ + if ( !noCS ) { + /* Don't use vCS here. vCS may assumes CS needs to be on the stack. + * Just use the interface register. */ + out << + " movq $" << redFsm->startState->id << ", %r11\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " movq $0, " << NFA_TOP() << "\n"; + } + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) { + out << + " movq $0, " << TOP() << "\n"; + } + + if ( red->hasLongestMatch ) { + out << + " movq $0, " << TOKSTART() << "\n" + " movq $0, " << TOKEND() << "\n" + " movq $0, " << ACT() << "\n"; + } +} + +string AsmCodeGen::DATA_PREFIX() +{ + if ( !noPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string AsmCodeGen::ALPH_TYPE() +{ + string ret = alphType->data1; + if ( alphType->data2 != 0 ) { + ret += " "; + ret += + alphType->data2; + } + return ret; +} + +void AsmCodeGen::STATIC_CONST_INT( const string &name, const string &value ) +{ + out << + " .align 8\n" + " .type " << name << ", @object\n" + " .size " << name << ", 8\n" << + name << ":\n" + " .long " << value << "\n"; +} + +void AsmCodeGen::STATE_IDS() +{ + if ( redFsm->startState != 0 ) + STATIC_CONST_INT( START(), START_STATE_ID() ); + + if ( !noFinal ) + STATIC_CONST_INT( FIRST_FINAL(), FIRST_FINAL_STATE() ); + + if ( !noError ) + STATIC_CONST_INT( ERROR(), ERROR_STATE() ); + + out << "\n"; + + if ( red->entryPointNames.length() > 0 ) { + for ( EntryNameVect::Iter en = red->entryPointNames; en.lte(); en++ ) { + ostringstream ret; + ret << redFsm->startState->id; + + STATIC_CONST_INT( string( DATA_PREFIX() + "en_" + *en ), + ret.str() ); + } + out << "\n"; + } +} + +void AsmCodeGen::writeStart() +{ + out << START_STATE_ID(); +} + +void AsmCodeGen::writeFirstFinal() +{ + out << FIRST_FINAL_STATE(); +} + +void AsmCodeGen::writeError() +{ + out << ERROR_STATE(); +} + +string AsmCodeGen::PTR_CONST() +{ + return "const "; +} + +string AsmCodeGen::PTR_CONST_END() +{ + return ""; +} + +std::ostream &AsmCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << " " << name << "[] = {\n"; + return out; +} + +std::ostream &AsmCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &AsmCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string AsmCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string AsmCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string AsmCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +string AsmCodeGen::NULL_ITEM() +{ + return "0"; +} + +string AsmCodeGen::POINTER() +{ + return " *"; +} + +std::ostream &AsmCodeGen::SWITCH_DEFAULT() +{ + return out; +} + +string AsmCodeGen::CTRL_FLOW() +{ + return ""; +} + +void AsmCodeGen::writeExports() +{ + if ( red->exportList.length() > 0 ) { + for ( ExportList::Iter ex = red->exportList; ex.lte(); ex++ ) { + out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " << + KEY(ex->key) << "\n"; + } + out << "\n"; + } +} + +string AsmCodeGen::LABEL( const char *type, long i ) +{ + std::stringstream s; + s << ".L" << red->machineId << "_" << type << "_" << i; + return s.str(); +} + +string AsmCodeGen::LABEL( const char *name ) +{ + std::stringstream s; + s << ".L" << red->machineId << "_" << name; + return s.str(); +} + +void AsmCodeGen::emitSingleIfElseIf( RedStateAp *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + /* Write out the single indices. */ + for ( int j = 0; j < numSingles; j++ ) { + out << + " cmpb " << KEY( data[j].lowKey ) << ", %r10b\n" + " je " << TRANS_GOTO_TARG( data[j].value ) << "\n"; + } +} + +void AsmCodeGen::emitSingleJumpTable( RedStateAp *state, string def ) +{ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + long long low = data[0].lowKey.getVal(); + long long high = data[numSingles-1].lowKey.getVal(); + + if ( def.size() == 0 ) + def = LABEL( "sjf", state->id ); + + out << + " movzbq %r10b, %rax\n" + " subq $" << low << ", %rax\n" + " cmpq $" << (high - low) << ", %rax\n" + " ja " << def << "\n" + " leaq " << LABEL( "sjt", state->id ) << "(%rip), %rcx\n" + " movslq (%rcx,%rax,4), %rdx\n" + " addq %rcx, %rdx\n" + " jmp *%rdx\n" + " .section .rodata\n" + " .align 4\n" + << LABEL( "sjt", state->id ) << ":\n"; + + for ( long long j = 0; j < numSingles; j++ ) { + /* Fill in gap between prev and this. */ + if ( j > 0 ) { + long long span = keyOps->span( data[j-1].lowKey, data[j].lowKey ) - 2; + for ( long long k = 0; k < span; k++ ) { + out << " .long " << def << " - " << + LABEL( "sjt", state->id ) << "\n"; + } + } + + out << " .long " << TRANS_GOTO_TARG( data[j].value ) << " - " << + LABEL( "sjt", state->id ) << "\n"; + } + + out << + " .text\n" + "" << LABEL( "sjf", state->id ) << ":\n"; +} + + +void AsmCodeGen::emitRangeBSearch( RedStateAp *state, int low, int high ) +{ + static int nl = 1; + + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = keyOps->eq( data[mid].lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( data[mid].highKey, keyOps->maxKey ); + +// string nf = TRANS_GOTO_TARG( state->defTrans ); + + /* For some reason the hop is faster and results in smaller code. Not sure + * why. */ + string nf = LABEL( "nf", state->id ); + + if ( anyLower && anyHigher ) { + int l1 = nl++; + string targ = TRANS_GOTO_TARG( data[mid].value ); + + /* Can go lower and higher than mid. */ + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jge " << LABEL( "nl", l1 ) << "\n"; + + + emitRangeBSearch( state, low, mid-1 ); + + out << + LABEL( "nl", l1 ) << ":\n"; + + if ( !keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY ( data[mid].highKey ) << ", %r10b\n"; + } + + out << + " jle " << targ << "\n"; + + emitRangeBSearch( state, mid+1, high ); + } + else if ( anyLower && !anyHigher ) { + + string targ; + if ( limitHigh ) + targ = TRANS_GOTO_TARG( data[mid].value ); + else + targ = LABEL( "nl", nl++ ); + + /* Can go lower than mid but not higher. */ + + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jge " << targ << "\n"; + + emitRangeBSearch( state, low, mid-1 ); + + /* If the higher is the highest in the alphabet then there is no sense + * testing it. */ + if ( !limitHigh ) { + + out << + targ << ":\n"; + + if ( ! keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY ( data[mid].highKey ) << ", %r10b\n"; + } + + out << + " jg " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + } + else if ( !anyLower && anyHigher ) { + string targ; + if ( limitLow ) + targ = TRANS_GOTO_TARG( data[mid].value ); + else + targ = LABEL( "nl", nl++ ); + + /* Can go higher than mid but not lower. */ + + out << + " cmpb " << KEY( data[mid].highKey ) << ", %r10b\n" + " jle " << targ << "\n"; + + emitRangeBSearch( state, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( !limitLow ) { + + out << + targ << ":\n"; + + if ( !keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n"; + } + + out << + " jl " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + + if ( !keyOps->eq( data[mid].lowKey, data[mid].highKey ) ) { + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jl " << nf << "\n" + " cmpb " << KEY( data[mid].highKey ) << ", %r10b\n" + " jg " << nf << "\n"; + } + else { + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jne " << nf << "\n"; + } + + TRANS_GOTO( data[mid].value ); + } + else if ( limitLow && !limitHigh ) { + + out << + " cmpb " << KEY( data[mid].highKey ) << ", %r10b\n" + " jg " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + else if ( !limitLow && limitHigh ) { + + out << + " cmpb " << KEY( data[mid].lowKey ) << ", %r10b\n" + " jl " << nf << "\n"; + + TRANS_GOTO( data[mid].value ); + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO( data[mid].value ); + } + } +} + +void AsmCodeGen::emitCharClassIfElseIf( RedStateAp *st ) +{ + long long span = st->high - st->low + 1; + for ( long long pos = 0; pos < span; pos++ ) { + out << + " cmpb " << KEY( st->low + pos ) << ", %r10b\n" + " je " << TRANS_GOTO_TARG( st->transList[pos] ) << "\n"; + } +} + +void AsmCodeGen::emitCharClassJumpTable( RedStateAp *st, string def ) +{ + long long low = st->low; + long long high = st->high; + + if ( def.size() == 0 ) + def = LABEL( "ccf", st->id ); + + out << + " movzbq %r10b, %rax\n" + " subq $" << low << ", %rax\n" + " cmpq $" << (high - low) << ", %rax\n" + " ja " << def << "\n" + " leaq " << LABEL( "cct", st->id ) << "(%rip), %rcx\n" + " movslq (%rcx,%rax,4), %rdx\n" + " addq %rcx, %rdx\n" + " jmp *%rdx\n" + " .section .rodata\n" + " .align 4\n" + << LABEL( "cct", st->id ) << ":\n"; + + long long span = st->high - st->low + 1; + for ( long long pos = 0; pos < span; pos++ ) { + out << " .long " << TRANS_GOTO_TARG( st->transList[pos] ) << " - " << + LABEL( "cct", st->id ) << "\n"; + } + + out << + " .text\n" + "" << LABEL( "ccf", st->id ) << ":\n"; +} + +void AsmCodeGen::NFA_PUSH( RedStateAp *st ) +{ + if ( st->nfaTargs != 0 && st->nfaTargs->length() > 0 ) { + if ( red->nfaPrePushExpr != 0 ) { + out << " movq $" << st->nfaTargs->length() << ", %rdi\n"; + INLINE_LIST( out, red->nfaPrePushExpr->inlineList, 0, false, false ); + } + + for ( RedNfaTargs::Iter t = *st->nfaTargs; t.lte(); t++ ) { + out << + " movq " << NFA_STACK() << ", %rax\n" + " movq " << NFA_TOP() << ", %rcx\n" + " imulq $24, %rcx\n" + " movq $" << t->state->id << ", 0(%rax,%rcx,)\n" + " movq " << P() << ", 8(%rax,%rcx,)\n"; + + out << + " # pop action id " << t->id << "\n" + " movq $" << t->id << ", 16(%rax,%rcx,)\n"; + + if ( t->push ) { + for ( GenActionTable::Iter item = t->push->key; item.lte(); item++ ) { + ACTION( out, item->value, st->id, false, + t->push->anyNextStmt() ); + out << "\n"; + } + } + + out << + " movq " << NFA_TOP() << ", %rcx\n" + " addq $1, %rcx\n" + " movq %rcx, " << NFA_TOP() << "\n"; + } + } +} + +void AsmCodeGen::STATE_GOTOS() +{ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Writing code above state gotos. */ + IN_TRANS_ACTIONS( st ); + + if ( st->labelNeeded ) + out << LABEL( "st", st->id ) << ":\n"; + + + /* need to do this if the transition is an eof transition, or if the action + * contains fexec. Otherwise, no need. */ + if ( eof ) { + out << + " cmpq " << P() << ", " << vEOF() << "\n"; + + if ( st->isFinal ) + out << " je " << LABEL( "out", st->id ) << "\n"; + else + out << " je " << LABEL( "pop", st->id ) << "\n"; + } + + if ( st->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, st->id, false, + st->toStateAction->anyNextStmt() ); + out << "\n"; + } + } + + if ( st == redFsm->errState ) { + out << LABEL( "en", st->id ) << ":\n"; + + /* Break out here. */ + outLabelUsed = true; + + out << + " movq $" << st->id << ", " << vCS() << "\n" + " jmp " << LABEL( "pop" ) << "\n"; + } + else { + /* Advance and test buffer pos. */ + if ( st->labelNeeded ) { + out << + " addq $1, " << P() << "\n"; + + } + + /* This is the entry label for starting a run. */ + out << LABEL( "en", st->id ) << ":\n"; + + if ( !noEnd ) { + if ( eof ) { + out << + " cmpq " << P() << ", " << PE() << "\n" + " jne " << LABEL( "nope", st->id ) << "\n" << + " cmpq " << P() << ", " << vEOF() << "\n" + " jne " << LABEL( "out", st->id ) << "\n" << + LABEL( "nope", st->id ) << ":\n"; + } + else { + out << + " cmpq " << P() << ", " << PE() << "\n" + " je " << LABEL( "out", st->id ) << "\n"; + } + } + + NFA_PUSH( st ); + + if ( st->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = st->fromStateAction->key; + item.lte(); item++ ) + { + ACTION( out, item->value, st->id, false, + st->fromStateAction->anyNextStmt() ); + out << "\n"; + } + } + + if ( !noEnd && eof ) { + out << + " cmpq " << P() << ", " << vEOF() << "\n" + " jne " << LABEL( "neofd", st->id ) << "\n"; + + if ( st->eofTrans != 0 ) + TRANS_GOTO( st->eofTrans ); + else { + if ( st->isFinal || !redFsm->anyNfaStates() ) + out << "jmp " << LABEL( "out", st->id ) << "\n"; + else + out << "jmp " << LABEL( "pop", st->id ) << "\n"; + } + + out << + " jmp " << LABEL( "deofd", st->id ) << "\n"; + + out << LABEL( "neofd", st->id ) << ":\n"; + } + + /* Record the prev state if necessary. */ + if ( st->anyRegCurStateRef() ) { + out << + " movq $" << st->id << ", -72(%rbp)\n"; + } + + +#ifdef LOG_TRANS + out << + " movzbl (" << P() << "), %r10d\n" + " movq $" << machineId << ", %rdi\n" + " movq $" << st->id << ", %rsi\n" + " movslq %r10d, %rdx\n" + " call " << LABEL( "log_trans" ) << "\n" + ; +#endif + + /* Load *p. */ + if ( st->transList != 0 ) { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + out << + " movzbl (" << P() << "), %r10d\n" + " cmpl $" << lowKey << ", %r10d\n" + " jl " << LABEL( "nf", st->id ) << "\n" + " cmpl $" << highKey << ", %r10d\n" + " jg " << LABEL( "nf", st->id ) << "\n" + " subl " << KEY( lowKey ) << ", %r10d\n" + " leaq " << LABEL( "char_class" ) << "(%rip), %rcx\n" + " movslq %r10d, %rax\n" + " movb (%rcx, %rax), %r10b\n" + ; + + + long len = ( st->high - st->low + 1 ); + + if ( len < 8 ) + emitCharClassIfElseIf( st ); + else { + string def; + if ( st->outRange.length() == 0 ) + def = TRANS_GOTO_TARG( st->defTrans ); + emitCharClassJumpTable( st, def ); + } + } + + /* Write the default transition. */ + out << LABEL( "nf", st->id ) << ":\n"; + TRANS_GOTO( st->defTrans ); + + if ( !noEnd && eof ) { + out << LABEL( "deofd", st->id) << ":\n"; + } + } + } +} + +unsigned int AsmCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int AsmCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +unsigned int AsmCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + +bool AsmCodeGen::useAgainLabel() +{ + return redFsm->anyActionRets() || + redFsm->anyActionByValControl() || + redFsm->anyRegNextStmt(); +} + +void AsmCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << + " jmp " << LABEL( "st", gotoDest ) << "\n"; +} + +void AsmCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + ; + + ret << + " jmp " << LABEL( "st", callDest ) << "\n"; + ; +} + +void AsmCodeGen::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + "\n" + " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", %rdx\n" + "\n" + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + " movq %rdx, " << vCS() << "\n" + ; + + ret << + " jmp " << LABEL( "again" ) << "\n"; +} + +void AsmCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " subq $1, %rcx\n" + " movq (%rax, %rcx, 8), %rax\n" + " movq %rax, " << vCS() << "\n" + " movq %rcx, " << TOP() << "\n"; + + if ( red->postPopExpr != 0 ) + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + + ret << + " jmp " << LABEL("again") << "\n"; +} + +void AsmCodeGen::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", " << vCS() << "\n"; + + ret << + " jmp " << LABEL("again") << "\n"; +} + +void AsmCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << + " movq $" << nextDest << ", " << vCS() << "\n"; +} + +void AsmCodeGen::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", " << vCS() << "\n"; +} + +void AsmCodeGen::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + " movq $" << callDest << ", " << vCS() << "\n"; +} + +void AsmCodeGen::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, + int targState, bool inFinish ) +{ + if ( red->prePushExpr != 0 ) + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + + ret << + "\n" + " movq "; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ", %rdx\n" + "\n" + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " movq $" << targState << ", (%rax, %rcx, 8)\n" + " addq $1, %rcx\n" + " movq %rcx, " << TOP() << "\n" + " movq %rdx, " << vCS() << "\n"; +} + +void AsmCodeGen::NRET( ostream &ret, bool inFinish ) +{ + ret << + " movq " << STACK() << ", %rax\n" + " movq " << TOP() << ", %rcx\n" + " subq $1, %rcx\n" + " movq (%rax, %rcx, 8), %rax\n" + " movq %rax, " << vCS() << "\n" + " movq %rcx, " << TOP() << "\n"; + + if ( red->postPopExpr != 0 ) + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); +} + +void AsmCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << + " movq -72(%rbp), %rax\n"; +} + +void AsmCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << + " movq $" << targState << ", %rax\n"; +} + +void AsmCodeGen::BREAK( ostream &ret, int targState, bool csForced ) +{ + outLabelUsed = true; + ret << "{" << P() << "++; "; + if ( !csForced ) + ret << vCS() << " = " << targState << "; "; + ret << CTRL_FLOW() << "goto _out;}"; +} + +bool AsmCodeGen::IN_TRANS_ACTIONS( RedStateAp *state ) +{ + bool anyWritten = false; + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInCondTests; it++ ) { + /* Write the label for the transition so it can be jumped to. */ + RedTransAp *trans = state->inCondTests[it]; + out << LABEL( "ctr", trans->id ) << ":\n"; + + if ( trans->condSpace->condSet.length() == 1 ) { + RedCondPair *tp, *fp; + if ( trans->numConds() == 1 ) { + /* The single condition is either false or true, errCond is the + * opposite. */ + if ( trans->outCondKey(0) == 0 ) { + fp = trans->outCond(0); + tp = trans->errCond(); + } + else { + tp = trans->outCond(0); + fp = trans->errCond(); + } + } + else { + /* Full list, goes false, then true. */ + fp = trans->outCond(0); + tp = trans->outCond(1); + } + + GenCondSet::Iter csi = trans->condSpace->condSet; + CONDITION( out, *csi ); + + out << + " test %eax, %eax\n" + " je " << TRANS_GOTO_TARG( fp ) << "\n" + " jmp " << TRANS_GOTO_TARG( tp ) << "\n"; + } + else { + out << " movq $0, %r9\n"; + + for ( GenCondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + out << + " pushq %r9\n"; + + CONDITION( out, *csi ); + out << + "\n" + " test %eax, %eax\n" + " setne %cl\n" + " movsbq %cl, %rcx\n" + " salq $" << csi.pos() << ", %rcx\n" + " popq %r9\n" + " addq %rcx, %r9\n"; + } + + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + RedCondPair *pair = trans->outCond( c ); + out << + " cmpq " << COND_KEY( key ) << ", %r9\n" + " je " << TRANS_GOTO_TARG( pair ) << "\n"; + + } + + RedCondPair *err = trans->errCond(); + if ( err != 0 ) { + out << + " jmp " << TRANS_GOTO_TARG( err ) << "\n"; + } + } + } + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInConds; it++ ) { + RedCondPair *pair = state->inConds[it]; + if ( pair->action != 0 /* && pair->labelNeeded */ ) { + /* Remember that we wrote an action so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write the label for the transition so it can be jumped to. */ + out << LABEL( "tr", pair->id ) << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( pair->action->anyNextStmt() ) { + out << + " movq $" << pair->targ->id << ", " << vCS() << "\n"; + } + + if ( redFsm->anyRegNbreak() ) { + out << + " movb $0, " << NBREAK() << "\n"; + } + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = pair->action->key; item.lte(); item++ ) { + ACTION( out, item->value, pair->targ->id, false, + pair->action->anyNextStmt() ); + out << "\n"; + } + + if ( redFsm->anyRegNbreak() ) { + out << + " cmpb $0, " << NBREAK() << "\n" + " jne " << LABEL( "pop" ) << "\n"; + outLabelUsed = true; + } + + + /* If the action contains a next then we need to reload, otherwise + * jump directly to the target state. */ + if ( pair->action->anyNextStmt() ) + out << " jmp " << LABEL( "again" ) << "\n"; + else + out << " jmp " << LABEL( "st", pair->targ->id ) << "\n"; + } + } + + return anyWritten; +} + +std::string AsmCodeGen::TRANS_GOTO_TARG( RedCondPair *pair ) +{ + std::stringstream s; + if ( pair->action != 0 ) { + /* Go to the transition which will go to the state. */ + s << LABEL( "tr", pair->id ); + } + else { + /* Go directly to the target state. */ + s << LABEL( "st", pair->targ->id ); + } + return s.str(); +} + +std::string AsmCodeGen::TRANS_GOTO_TARG( RedTransAp *trans ) +{ + if ( trans->condSpace != 0 ) { + /* Need to jump to the trans since there are conditions. */ + return LABEL( "ctr", trans->id ); + } + else { + return TRANS_GOTO_TARG( &trans->p ); + } +} + +/* Emit the goto to take for a given transition. */ +std::ostream &AsmCodeGen::TRANS_GOTO( RedTransAp *trans ) +{ + out << " jmp " << TRANS_GOTO_TARG( trans ) << "\n"; + return out; +} + +std::ostream &AsmCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << + LABEL( "out", st->id ) << ":\n" + " movq $" << st->id << ", " << vCS() << "\n" + " jmp " << LABEL( "out" ) << "\n"; + + out << + LABEL( "pop", st->id ) << ":\n" + " movq $" << st->id << ", " << vCS() << "\n" + " jmp " << LABEL( "pop" ) << "\n"; + } + return out; +} + +std::ostream &AsmCodeGen::AGAIN_CASES() +{ + /* Jump into the machine based on the current state. */ + out << + " leaq " << LABEL( "again_jmp" ) << "(%rip), %rcx\n"; + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + + out << + " movq (%rcx,%r11,8), %rcx\n" + " jmp *%rcx\n" + " .section .rodata\n" + " .align 8\n" + << LABEL( "again_jmp" ) << ":\n"; + + for ( int stId = 0; stId < redFsm->stateList.length(); stId++ ) { + out << + " .quad " << LABEL( "st", stId ) << "\n"; + } + + out << + " .text\n"; + + return out; +} + +std::ostream &AsmCodeGen::ENTRY_CASES() +{ + out << + " movq (%rcx,%r11,8), %rcx\n" + " jmp *%rcx\n" + " .section .rodata\n" + " .align 8\n" + << LABEL( "entry_jmp" ) << ":\n"; + + for ( int stId = 0; stId < redFsm->stateList.length(); stId++ ) { + out << + " .quad " << LABEL( "en", stId ) << "\n"; + } + + out << + " .text\n"; + return out; +} + + +std::ostream &AsmCodeGen::FINISH_CASES() +{ + /* The current state is in %rax. */ + /*long done = */ nextLmSwitchLabel++; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + out << + " cmpq $" << st->id << ", %rax\n" + " jne " << LABEL( "fc", st->id ) << "\n"; + + if ( st->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = st->fromStateAction->key; + item.lte(); item++ ) + { + ACTION( out, item->value, st->id, false, + st->fromStateAction->anyNextStmt() ); + out << "\n"; + } + } + + out << + " jmp " << TRANS_GOTO_TARG( st->eofTrans ) << "\n" << + LABEL( "fc", st->id ) << ":\n"; + } + } + + return out; +} + +void AsmCodeGen::setLabelsNeeded( GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Goto: case GenInlineItem::Call: { + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( item->children ); + } +} + +void AsmCodeGen::setLabelsNeeded( RedCondPair *pair ) +{ + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( pair->action == 0 || !pair->action->anyNextStmt() ) + pair->targ->labelNeeded = true; + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( pair->action != 0 ) { + /* Loop the actions. */ + for ( GenActionTable::Iter act = pair->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( act->value->inlineList ); + } + } +} + +/* Set up labelNeeded flag for each state. */ +void AsmCodeGen::setLabelsNeeded() +{ + /* If we use the _again label, then we the _again switch, which uses all + * labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + setLabelsNeeded( &trans->p ); + } + + for ( CondApSet::Iter cond = redFsm->condSet; cond.lte(); cond++ ) + setLabelsNeeded( &cond->p ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + setLabelsNeeded( item->value->inlineList ); + } + } + } + + if ( !noEnd ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; + } +} + +void AsmCodeGen::writeData() +{ + STATE_IDS(); + + long long maxSpan = keyOps->span( redFsm->lowKey, redFsm->highKey ); + + out << + " .type " << LABEL( "char_class" ) << ", @object\n" << + LABEL( "char_class" ) << ":\n"; + + for ( long long pos = 0; pos < maxSpan; pos++ ) { + out << + " .byte " << redFsm->classMap[pos] << "\n"; + } + +#ifdef LOG_TRANS + out << + LABEL( "fmt_log_trans" ) << ":\n" + " .string \"%i %i %i\\n\"\n"; +#endif +} + +void AsmCodeGen::setNfaIds() +{ + long nextId = 1; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) { + targ->id = nextId; + nextId += 1; + } + } + } +} + +void AsmCodeGen::writeExec() +{ + /* Must set labels immediately before writing because we may depend on the + * noend write option. */ + setLabelsNeeded(); + testEofUsed = false; + outLabelUsed = false; + + setNfaIds(); + + /* If there are eof actions then we need to run code after exporting the + * final state to vCS. Since the interface register is calee-save, we need + * it to live on the stack. */ + stackCS = redFsm->anyEofActivity(); + + /* + * This code needs 88 bytes of stack (offset 0 from %rbp). + * + * cv : %r9 -- caller-save, used internally, condition char, undefined in + * conditions and actions, can use + * + * pc : %r10b -- caller-save, used internally, undefined in conditions + * actions, can use + * + * cs : %r11 -- caller-save, written by write init, read and + * written by exec, undefined in conditions and actions + * + * p : %r12 -- callee-save, interface, persistent + * + * pe : %r13 -- callee-save, interface, persistent + * + * eof: -8(%rbp) + * + * ts: -16(%rbp) + * + * te: -24(%rbp) + * + * act: -32(%rbp) + * + * _nbreak: -40(%rbp) + * + * stackCS: -48(%rbp) + * + * stack: -56(%rbp) + * top: -64(%rbp) + * + * _ps: -72(%rbp) + * + * nfa_stack -80(%rbp) + * nfa_top -88(%rbp) + * nfa_sz -96(%rbp) + */ + + if ( redFsm->anyRegCurStateRef() ) { + out << + " movq $0, -72(%rbp)\n"; + } + + if ( stackCS ) { + /* Only need a persistent cs in the case of eof actions when exiting the + * block. Where CS lives is a matter of performance though, so we should + * only do this if necessary. */ + out << + " movq %r11, " << vCS() << "\n"; + } + + if ( useAgainLabel() ) { + out << + " jmp " << LABEL( "resume" ) << "\n" + << LABEL( "again" ) << ":\n"; + + AGAIN_CASES(); + } + + if ( useAgainLabel() || redFsm->anyNfaStates() ) + out << LABEL( "resume" ) << ":\n"; + + /* Jump into the machine based on the current state. */ + out << + " leaq " << LABEL( "entry_jmp" ) << "(%rip), %rcx\n"; + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + + ENTRY_CASES(); + + STATE_GOTOS(); + + EXIT_STATES(); + + out << LABEL( "pop" ) << ":\n"; + + if ( redFsm->anyNfaStates() ) { + out << + " movq " << NFA_TOP() << ", %rcx\n" + " cmpq $0, %rcx\n" + " je " << LABEL( "nfa_stack_empty" ) << "\n" + " movq " << NFA_TOP() << ", %rcx\n" + " subq $1, %rcx\n" + " movq %rcx, " << NFA_TOP() << "\n" + " movq " << NFA_STACK() << ", %rax\n" + " imulq $24, %rcx\n" + " movq 0(%rax,%rcx,), %r11\n" + " movq 8(%rax,%rcx,), " << P() << "\n" + " movq %r11, " << vCS() << "\n" + ; + + if ( redFsm->bAnyNfaPops ) { + out << + " movq %r11, %r14\n" + " movq 16(%rax,%rcx,), %rax\n"; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) { + + /* Write the entry label. */ + out << + " # pop action select\n" + " cmp $" << targ->id << ", %rax\n" + " jne 100f\n"; + + if ( targ->popTest != 0 ) { + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = targ->popTest->key; item.lte(); item++ ) + NFA_CONDITION( out, item->value, item.last() ); + } + + out << + " jmp 101f\n" + "100:\n"; + + } + } + } + + out << + "101:\n" + " movq %r14, %r11\n"; + } + + out << + " jmp " << LABEL( "resume" ) << "\n" << + LABEL( "pop_fail" ) << ":\n" + " movq $" << ERROR_STATE() << ", " << vCS() << "\n" + " jmp " << LABEL( "resume" ) << "\n" << + LABEL( "nfa_stack_empty" ) << ":\n"; + } + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + + out << + "# WRITE EXEC END\n"; + + out << LABEL( "out" ) << ":\n"; + + if ( stackCS ) { + out << + " movq " << vCS() << ", %r11\n"; + } + +#ifdef LOG_TRANS + out << + " jmp " << LABEL( "skip" ) << "\n" << + LABEL( "log_trans" ) << ":\n" + " movq %rdx, %rcx\n" + " movq %rsi, %rdx\n" + " movq %rdi, %rsi\n" + " movq " << LABEL( "fmt_log_trans" ) << "@GOTPCREL(%rip), %rdi\n" + " movq $0, %rax\n" + " call printf@PLT\n" + " ret\n" << + LABEL( "skip" ) << ":\n" + "\n"; +#endif +} diff --git a/libfsm/asm.h b/libfsm/asm.h new file mode 100644 index 00000000..3b4229d4 --- /dev/null +++ b/libfsm/asm.h @@ -0,0 +1,248 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ASM_H +#define _ASM_H + +#include <iostream> +#include <string> +#include <iomanip> +#include <stdio.h> + +#include "common.h" +#include "gendata.h" +#include "ragel.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL_INTEGRAL 8 +#define IALL_STRING 128 + +/* Forwards. */ +struct RedFsmAp; +struct RedStateAp; +struct CodeGenData; +struct GenAction; +struct NameInst; +struct GenInlineItem; +struct GenInlineList; +struct RedAction; +struct LongestMatch; +struct LongestMatchPart; +class AsmCodeGen; +struct RedTransAp; +struct RedStateAp; +struct GenStateCond; + +string itoa( int i ); + +/* + * class AsmCodeGen + */ +class AsmCodeGen : public CodeGenData +{ +public: + AsmCodeGen( const CodeGenArgs &args ); + virtual ~AsmCodeGen() {} + + virtual void writeInit(); + virtual void writeStart(); + virtual void writeFirstFinal(); + virtual void writeError(); + + virtual void statsSummary() {} + virtual void genAnalysis(); + +protected: + string FSM_NAME(); + string START_STATE_ID(); + string KEY( Key key ); + string COND_KEY( CondKey key ); + string LDIR_PATH( char *path ); + virtual void ACTION( ostream &ret, GenAction *action, int targState, + bool inFinish, bool csForced ); + void CONDITION( ostream &ret, GenAction *condition ); + void NFA_CONDITION( ostream &ret, GenAction *condition, bool last ); + string ALPH_TYPE(); + + bool isAlphTypeSigned(); + + string GET_KEY(); + + string P(); + string PE(); + string vEOF(); + string NBREAK(); + + string ACCESS(); + string vCS(); + string STACK(); + string TOP(); + string TOKSTART(); + string TOKEND(); + string ACT(); + + string NFA_STACK(); + string NFA_TOP(); + string NFA_SZ(); + + string DATA_PREFIX(); + string PM() { return "_" + DATA_PREFIX() + "partition_map"; } + string C() { return "_" + DATA_PREFIX() + "cond_spaces"; } + string CK() { return "_" + DATA_PREFIX() + "cond_keys"; } + string K() { return "_" + DATA_PREFIX() + "trans_keys"; } + string I() { return "_" + DATA_PREFIX() + "indices"; } + string CO() { return "_" + DATA_PREFIX() + "cond_offsets"; } + string KO() { return "_" + DATA_PREFIX() + "key_offsets"; } + string IO() { return "_" + DATA_PREFIX() + "index_offsets"; } + string CL() { return "_" + DATA_PREFIX() + "cond_lengths"; } + string SL() { return "_" + DATA_PREFIX() + "single_lengths"; } + string RL() { return "_" + DATA_PREFIX() + "range_lengths"; } + string A() { return "_" + DATA_PREFIX() + "actions"; } + string TA() { return "_" + DATA_PREFIX() + "trans_actions"; } + string TT() { return "_" + DATA_PREFIX() + "trans_targs"; } + string TSA() { return "_" + DATA_PREFIX() + "to_state_actions"; } + string FSA() { return "_" + DATA_PREFIX() + "from_state_actions"; } + string EA() { return "_" + DATA_PREFIX() + "eof_actions"; } + string ET() { return "_" + DATA_PREFIX() + "eof_trans"; } + string SP() { return "_" + DATA_PREFIX() + "key_spans"; } + string CSP() { return "_" + DATA_PREFIX() + "cond_key_spans"; } + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + string CTXDATA() { return DATA_PREFIX() + "ctxdata"; } + + string LABEL( const char *type, long i ); + string LABEL( const char *name ); + + void INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ); + void EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, GenInlineItem *item, int targState, + int inFinish, bool csForced ); + void SET_ACT( ostream &ret, GenInlineItem *item ); + void INIT_TOKSTART( ostream &ret, GenInlineItem *item ); + void INIT_ACT( ostream &ret, GenInlineItem *item ); + void SET_TOKSTART( ostream &ret, GenInlineItem *item ); + void SET_TOKEND( ostream &ret, GenInlineItem *item ); + void GET_TOKEND( ostream &ret, GenInlineItem *item ); + void STATIC_CONST_INT( const string &name, const string &val ); + void STATE_IDS(); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + bool outLabelUsed; + bool testEofUsed; + bool againLabelUsed; + long nextLmSwitchLabel; + bool stackCS; + + void NBREAK( ostream &ret, int targState, bool csForced ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + + void HOST_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void HOST_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void HOST_TEXT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void GEN_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + void GEN_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ); + +public: + + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( string type, string name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( string type, string name ); + virtual string ARR_OFF( string ptr, string offset ); + virtual string CAST( string type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string PTR_CONST_END(); + virtual string CTRL_FLOW(); + + virtual void writeExports(); + + unsigned int TO_STATE_ACTION( RedStateAp *state ); + unsigned int FROM_STATE_ACTION( RedStateAp *state ); + unsigned int EOF_ACTION( RedStateAp *state ); + + void COND_TRANSLATE( GenStateCond *stateCond ); + void STATE_CONDS( RedStateAp *state, bool genDefault ); + + std::ostream &EXIT_STATES(); + std::string TRANS_GOTO_TARG( RedTransAp *trans ); + std::string TRANS_GOTO_TARG( RedCondPair *pair ); + std::ostream &TRANS_GOTO( RedTransAp *trans ); + std::ostream &AGAIN_CASES(); + std::ostream &FINISH_CASES(); + std::ostream &ENTRY_CASES(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void BREAK( ostream &ret, int targState, bool csForced ); + void LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + + virtual void writeData(); + virtual void writeExec(); + + bool useAgainLabel(); + + void NFA_PUSH( RedStateAp *state ); + bool IN_TRANS_ACTIONS( RedStateAp *state ); + void STATE_GOTOS(); + + void emitSingleIfElseIf( RedStateAp *state ); + void emitSingleJumpTable( RedStateAp *state, std::string def ); + void emitRangeBSearch( RedStateAp *state, int low, int high ); + void emitCharClassIfElseIf( RedStateAp *state ); + void emitCharClassJumpTable( RedStateAp *state, std::string def ); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded( RedCondPair *pair ); + void setLabelsNeeded( GenInlineList *inlineList ); + void setLabelsNeeded(); + + void setNfaIds(); + + void genOutputLineDirective( ostream &out ) {} + void genLineDirective( ostream &out, int line, const char *file ) {} +}; + +#endif diff --git a/libfsm/binary.cc b/libfsm/binary.cc new file mode 100644 index 00000000..39b58a47 --- /dev/null +++ b/libfsm/binary.cc @@ -0,0 +1,819 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "binary.h" +#include "redfsm.h" +#include "gendata.h" + +#include <assert.h> + +void Binary::genAnalysis() +{ + redFsm->sortByStateId(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose the singles. */ + redFsm->moveSelectTransToSingle(); + + if ( redFsm->errState != 0 ) + redFsm->getErrorCond(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + setKeyType(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + + +void Binary::tableDataPass() +{ + if ( type == Loop ) + taActions(); + + taKeyOffsets(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + taIndices(); + + taTransCondSpacesWi(); + taTransOffsetsWi(); + taTransLengthsWi(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondTargs(); + taCondActions(); + + taToStateActions(); + taFromStateActions(); + taEofActions(); + taEofConds(); + taEofTrans(); + + taKeys(); + taCondKeys(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Binary::writeData() +{ + if ( type == Loop ) { + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) + taActions(); + } + + taKeyOffsets(); + taKeys(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondKeys(); + taCondTargs(); + taCondActions(); + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + taEofConds(); + + if ( redFsm->anyEofTrans() ) + taEofTrans(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + + +void Binary::setKeyType() +{ + transKeys.setType( ALPH_TYPE(), alphType->size, alphType->isChar ); + transKeys.isSigned = keyOps->isSigned; +} + +void Binary::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +void Binary::taKeyOffsets() +{ + keyOffsets.start(); + + int curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + keyOffsets.value( curKeyOffset ); + curKeyOffset += st->outSingle.length() + st->outRange.length()*2; + } + + keyOffsets.finish(); +} + + +void Binary::taSingleLens() +{ + singleLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + singleLens.value( st->outSingle.length() ); + + singleLens.finish(); +} + + +void Binary::taRangeLens() +{ + rangeLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + rangeLens.value( st->outRange.length() ); + + rangeLens.finish(); +} + +void Binary::taIndexOffsets() +{ + indexOffsets.start(); + + int curIndOffset = 0; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + indexOffsets.value( curIndOffset ); + + /* Move the index offset ahead. */ + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + + indexOffsets.finish(); +} + +void Binary::taToStateActions() +{ + toStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + TO_STATE_ACTION(st); + + toStateActions.finish(); +} + +void Binary::taFromStateActions() +{ + fromStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + FROM_STATE_ACTION(st); + + fromStateActions.finish(); +} + +void Binary::taEofActions() +{ + eofActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + EOF_ACTION( st ); + + eofActions.finish(); +} + +void Binary::taEofConds() +{ + /* + * EOF Cond Spaces + */ + eofCondSpaces.start(); + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) + eofCondSpaces.value( st->outCondSpace->condSpaceId ); + else + eofCondSpaces.value( -1 ); + } + eofCondSpaces.finish(); + + /* + * EOF Cond Key Indixes + */ + eofCondKeyOffs.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long off = 0; + if ( st->outCondSpace != 0 ) { + off = curOffset; + curOffset += st->outCondKeys.length(); + } + eofCondKeyOffs.value( off ); + } + + eofCondKeyOffs.finish(); + + /* + * EOF Cond Key Lengths. + */ + eofCondKeyLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long len = 0; + if ( st->outCondSpace != 0 ) + len = st->outCondKeys.length(); + eofCondKeyLens.value( len ); + } + + eofCondKeyLens.finish(); + + /* + * EOF Cond Keys + */ + eofCondKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) { + for ( int c = 0; c < st->outCondKeys.length(); c++ ) { + CondKey key = st->outCondKeys[c]; + eofCondKeys.value( key.getVal() ); + } + } + } + + eofCondKeys.finish(); +} + +void Binary::taEofTrans() +{ + eofTrans.start(); + + /* Need to compute transition positions. */ + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + totalTrans += st->outSingle.length(); + totalTrans += st->outRange.length(); + if ( st->defTrans != 0 ) + totalTrans += 1; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long trans = 0; + if ( st->eofTrans != 0 ) { + trans = totalTrans + 1; + totalTrans += 1; + } + + eofTrans.value( trans ); + } + + eofTrans.finish(); +} + +void Binary::taKeys() +{ + transKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + transKeys.value( stel->lowKey.getVal() ); + } + + /* Loop the state's transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + /* Lower key. */ + transKeys.value( rtel->lowKey.getVal() ); + + /* Upper key. */ + transKeys.value( rtel->highKey.getVal() ); + } + } + + transKeys.finish(); +} + +void Binary::taIndices() +{ + indices.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + indices.value( stel->value->id ); + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + indices.value( rtel->value->id ); + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + indices.value( st->defTrans->id ); + } + + indices.finish(); +} + +void Binary::taTransCondSpaces() +{ + transCondSpaces.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + transCondSpaces.finish(); +} + +void Binary::taTransOffsets() +{ + transOffsets.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + errCondOffset = curOffset; + + transOffsets.finish(); +} + +void Binary::taTransLengths() +{ + transLengths.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transLengths.value( trans->numConds() ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transLengths.value( trans->numConds() ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transLengths.value( trans->numConds() ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transLengths.value( trans->numConds() ); + } + } + + transLengths.finish(); +} + +void Binary::taTransCondSpacesWi() +{ + transCondSpacesWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Cond Space id. */ + if ( trans->condSpace != 0 ) + transCondSpacesWi.value( trans->condSpace->condSpaceId ); + else + transCondSpacesWi.value( -1 ); + } + + transCondSpacesWi.finish(); +} + +void Binary::taTransOffsetsWi() +{ + transOffsetsWi.start(); + + int curOffset = 0; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transOffsetsWi.value( curOffset ); + + TransApSet::Iter next = trans; + next.increment(); + + curOffset += trans->numConds(); + } + + transOffsetsWi.finish(); +} + +void Binary::taTransLengthsWi() +{ + transLengthsWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transLengthsWi.value( trans->numConds() ); + + TransApSet::Iter next = trans; + next.increment(); + } + + transLengthsWi.finish(); +} + +void Binary::taCondKeys() +{ + condKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + condKeys.finish(); +} + +void Binary::taCondTargs() +{ + condTargs.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + condTargs.value( cond->targ->id ); + } + + condTargs.finish(); +} + +void Binary::taCondActions() +{ + condActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + COND_ACTION( cond ); + } + + condActions.finish(); +} + +void Binary::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Binary::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Binary::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + + nfaPopTrans.value( 0 ); + + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + +void Binary::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + nfaOffsets.value( 0 ); + } + else { + nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + nfaOffsets.finish(); +} + + +/* Write out the array of actions. */ +std::ostream &Binary::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +void Binary::taActions() +{ + actions.start(); + + /* Put "no-action" at the beginning. */ + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + + + + diff --git a/libfsm/binary.h b/libfsm/binary.h new file mode 100644 index 00000000..d947483d --- /dev/null +++ b/libfsm/binary.h @@ -0,0 +1,98 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_BINARY_H +#define _C_BINARY_H + +#include <iostream> +#include "codegen.h" +#include "tables.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +class Binary + : public virtual Tables +{ +protected: + enum Type { + Loop = 1, Exp + }; + +public: + Binary( const CodeGenArgs &args, Type type ) + : + Tables( args ), + type(type) + {} + +protected: + Type type; + + std::ostream &COND_KEYS_v1(); + std::ostream &COND_SPACES_v1(); + std::ostream &INDICES(); + std::ostream &INDEX_OFFSETS(); + std::ostream &SINGLE_LENS(); + std::ostream &RANGE_LENS(); + std::ostream &TRANS_TARGS_WI(); + std::ostream &ACTIONS_ARRAY(); + + void taKeyOffsets(); + void taSingleLens(); + void taRangeLens(); + void taIndexOffsets(); + void taIndices(); + void taTransCondSpacesWi(); + void taTransOffsetsWi(); + void taTransLengthsWi(); + void taTransCondSpaces(); + void taTransOffsets(); + void taTransLengths(); + void taCondTargs(); + void taCondActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofTrans(); + void taEofConds(); + void taEofActions(); + void taKeys(); + void taActions(); + void taCondKeys(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void setKeyType(); + + void setTableState( TableArray::State ); + + virtual void writeData(); + virtual void tableDataPass(); + virtual void genAnalysis(); +}; + +#endif diff --git a/libfsm/binbreak.cc b/libfsm/binbreak.cc new file mode 100644 index 00000000..18b71542 --- /dev/null +++ b/libfsm/binbreak.cc @@ -0,0 +1,132 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "binbreak.h" + +void BinBreak::LOCATE_TRANS() +{ + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), ARR_REF( keyOffsets ) + "[" + vCS() + "]" ) << ";\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexOffsets ) << "[" << vCS() << "];\n" + "\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( singleLens ) << "[" << vCS() << "];\n" + " " << have << " = 0;\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + " << klen << " - 1;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << keys << " += " << klen << ";\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << have << " = 1;\n" + " " << trans << " += " << CAST( UINT() ) << "(_mid - " << keys << ");\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n" + " " << klen << " = " << CAST("int") << ARR_REF( rangeLens ) << "[" << vCS() << "];\n" + " if ( " << have << " == 0 && " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + (" << klen << "<<1) - 2;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid + 1" ) << " )\n" + " _lower = _mid + 2;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "((_mid - " << keys << ")>>1);\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + +void BinBreak::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + trans.ref() + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + trans.ref() + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } + + out << EMIT_LABEL( _match_cond ); +} + diff --git a/libfsm/binbreak.h b/libfsm/binbreak.h new file mode 100644 index 00000000..1b48ab24 --- /dev/null +++ b/libfsm/binbreak.h @@ -0,0 +1,71 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_BINBREAK_H +#define RAGEL_BINBREAK_H + +#include "binary.h" +#include "actloop.h" +#include "actexp.h" + +struct BinBreak +: + public Binary, public TabBreak +{ + BinBreak( const CodeGenArgs &args, Binary::Type type ) + : + Tables( args ), + Binary( args, type ), + TabBreak( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class BinBreakLoop + : public BinBreak, public ActLoop +{ +public: + BinBreakLoop( const CodeGenArgs &args ) + : + Tables( args ), + BinBreak( args, Loop ), + ActLoop( args ) + {} +}; + + +class BinBreakExp + : public BinBreak, public ActExp +{ +public: + BinBreakExp( const CodeGenArgs &args ) + : + Tables( args ), + BinBreak( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/libfsm/bingoto.cc b/libfsm/bingoto.cc new file mode 100644 index 00000000..1f4a818d --- /dev/null +++ b/libfsm/bingoto.cc @@ -0,0 +1,131 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "bingoto.h" + +void BinGoto::LOCATE_TRANS() +{ + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), ARR_REF( keyOffsets ) + "[" + vCS() + "]" ) << ";\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexOffsets ) << "[" << vCS() << "];\n" + "\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( singleLens ) << "[" << vCS() << "];\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + " << klen << " - 1;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << keys << " += " << klen << ";\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "(_mid - " << keys << ");\n" + " goto " << _match << ";\n" + " }\n" + " }\n" + " }\n" + "\n" + " " << klen << " = " << CAST("int") << ARR_REF( rangeLens ) << "[" << vCS() << "];\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + (" << klen << "<<1) - 2;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid + 1" ) << " )\n" + " _lower = _mid + 2;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "((_mid - " << keys << ")>>1);\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; + + out << EMIT_LABEL( _match ); +} + + +void BinGoto::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + trans.ref() + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + trans.ref() + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/libfsm/bingoto.h b/libfsm/bingoto.h new file mode 100644 index 00000000..18fa8397 --- /dev/null +++ b/libfsm/bingoto.h @@ -0,0 +1,71 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_BINGOTO_H +#define RAGEL_BINGOTO_H + +#include "binary.h" +#include "actloop.h" +#include "actexp.h" + +struct BinGoto +: + public Binary, public TabGoto +{ + BinGoto( const CodeGenArgs &args, Binary::Type type ) + : + Tables( args ), + Binary( args, type ), + TabGoto( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class BinGotoLoop + : public BinGoto, public ActLoop +{ +public: + BinGotoLoop( const CodeGenArgs &args ) + : + Tables( args ), + BinGoto( args, Loop ), + ActLoop( args ) + {} +}; + + +class BinGotoExp + : public BinGoto, public ActExp +{ +public: + BinGotoExp( const CodeGenArgs &args ) + : + Tables( args ), + BinGoto( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/libfsm/binvar.cc b/libfsm/binvar.cc new file mode 100644 index 00000000..addec681 --- /dev/null +++ b/libfsm/binvar.cc @@ -0,0 +1,139 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "binvar.h" +#include "parsedata.h" +#include "inputdata.h" + + +void BinVar::LOCATE_TRANS() +{ + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), ARR_REF( keyOffsets ) + "[" + vCS() + "]" ) << ";\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexOffsets ) << "[" << vCS() << "];\n" + "\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( singleLens ) << "[" << vCS() << "];\n" + " " << have << " = 0;\n" + " if ( " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + " << klen << " - 1;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << keys << " += " << klen << ";\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << have << " = 1;\n" + " " << trans << " += " << CAST( UINT() ) << "(_mid - " << keys << ");\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "\n" + " " << klen << " = " << CAST("int") << ARR_REF( rangeLens ) << "[" << vCS() << "];\n" + " if ( " << have << " == 0 && " << klen << " > 0 ) {\n" + " " << INDEX( ALPH_TYPE(), "_lower" ) << " = " << keys << ";\n" + " " << INDEX( ALPH_TYPE(), "_upper" ) << " = " << keys << " + (" << klen << "<<1) - 2;\n" + " " << INDEX( ALPH_TYPE(), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << trans << " += " << CAST( UINT() ) << "" << klen << ";\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_KEY() << " < " << DEREF( ARR_REF( transKeys ), "_mid" ) << " )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_KEY() << " > " << DEREF( ARR_REF( transKeys ), "_mid + 1" ) << " )\n" + " _lower = _mid + 2;\n" + " else {\n" + " " << trans << " += " << CAST( UINT() ) << "((_mid - " << keys << ")>>1);\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + +void BinVar::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + trans.ref() + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + trans.ref() + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/libfsm/binvar.h b/libfsm/binvar.h new file mode 100644 index 00000000..cbbcef79 --- /dev/null +++ b/libfsm/binvar.h @@ -0,0 +1,72 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_BINVAR_H +#define RAGEL_BINVAR_H + +#include "binary.h" +#include "actloop.h" +#include "actexp.h" + +struct BinVar +: + public Binary, public TabVar +{ + BinVar( const CodeGenArgs &args, Binary::Type type ) + : + Tables( args ), + Binary( args, type ), + TabVar( args ) + {} + + void VAR_COND_BIN_SEARCH( Variable &var, TableArray &keys, std::string ok, std::string error ); + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class BinVarLoop + : public BinVar, public ActLoop +{ +public: + BinVarLoop( const CodeGenArgs &args ) + : + Tables( args ), + BinVar( args, Loop ), + ActLoop( args ) + {} +}; + +class BinVarExp +: + public BinVar, public ActExp +{ +public: + BinVarExp( const CodeGenArgs &args ) + : + Tables( args ), + BinVar( args, Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/libfsm/buffer.h b/libfsm/buffer.h new file mode 100644 index 00000000..72bcd5f9 --- /dev/null +++ b/libfsm/buffer.h @@ -0,0 +1,56 @@ +/* + * Copyright 2003-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _BUFFER_H +#define _BUFFER_H + +#define BUFFER_INITIAL_SIZE 4096 + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +struct Buffer +{ + Buffer() + { + data = (char*) malloc( BUFFER_INITIAL_SIZE ); + allocated = BUFFER_INITIAL_SIZE; + length = 0; + } + ~Buffer() { free(data); } + + void append( char p ) + { + if ( length == allocated ) { + allocated *= 2; + data = (char*) realloc( data, allocated ); + } + data[length++] = p; + } + + void clear() { length = 0; } + + char *data; + int allocated; + int length; +}; + +#endif diff --git a/libfsm/codegen.cc b/libfsm/codegen.cc new file mode 100644 index 00000000..ae6ceb06 --- /dev/null +++ b/libfsm/codegen.cc @@ -0,0 +1,1203 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "codegen.h" +#include "ragel.h" +#include "redfsm.h" +#include "gendata.h" +#include "inputdata.h" +#include "parsedata.h" +#include <sstream> +#include <string> +#include <assert.h> +#include <iomanip> + + +using std::ostream; +using std::ostringstream; +using std::string; +using std::endl; +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +std::ostream &operator<<( std::ostream &out, Variable &v ) +{ + out << v.name; + v.isReferenced = true; + return out; +} + +std::ostream &operator<<( std::ostream &out, GotoLabel &l ) +{ + out << l.name; + l.isReferenced = true; + return out; +} + +TableArray::TableArray( const char *name, CodeGen &codeGen ) +: + state(InitialState), + name(name), + width(0), + isSigned(true), + isChar(false), + stringTables( codeGen.stringTables ), + iall( codeGen.stringTables ? IALL_STRING : IALL_INTEGRAL ), + values(0), + + /* + * Use zero for min and max because + * we we null terminate every array. + */ + min(0), + max(0), + + codeGen(codeGen), + out(codeGen.out), + ln(0), + isReferenced(false), + started(false) +{ + codeGen.arrayVector.append( this ); +} + +std::string TableArray::ref() +{ + isReferenced = true; + return string("_") + codeGen.DATA_PREFIX() + name; +} + +long long TableArray::size() +{ + return width * values; +} + +void TableArray::startAnalyze() +{ +} + +void TableArray::valueAnalyze( long long v ) +{ + values += 1; + if ( v < min ) + min = v; + if ( v > max ) + max = v; +} + +void TableArray::finishAnalyze() +{ + if ( codeGen.backend == Direct ) { + /* Calculate the type if it is not already set. */ + if ( type.empty() ) { + if ( min >= S8BIT_MIN && max <= S8BIT_MAX ) { + type = "signed char"; + width = sizeof(char); + } + else if ( min >= S16BIT_MIN && max <= S16BIT_MAX ) { + type = "short"; + width = sizeof(short); + } + else if ( min >= S32BIT_MIN && max <= S32BIT_MAX ) { + type = "int"; + width = sizeof(int); + } + else if ( min >= S64BIT_MAX && max <= S64BIT_MAX ) { + type = "long"; + width = sizeof(long); + } + else { + type = "long long"; + width = sizeof(long long); + } + } + } + else { + /* Calculate the type if it is not already set. */ + if ( type.empty() ) { + if ( min >= S8BIT_MIN && max <= S8BIT_MAX ) { + type = "s8"; + width = sizeof(char); + } + else if ( min >= S16BIT_MIN && max <= S16BIT_MAX ) { + type = "s16"; + width = sizeof(short); + } + else if ( min >= S32BIT_MIN && max <= S32BIT_MAX ) { + type = "s32"; + width = sizeof(int); + } + else if ( min >= S64BIT_MAX && max <= S64BIT_MAX ) { + type = "s64"; + width = sizeof(long); + } + else { + type = "s128"; + width = sizeof(long long); + } + } + } +} + +void TableArray::startGenerate() +{ + if ( codeGen.backend == Direct ) { + if ( stringTables ) { + out << "static const char S_" << codeGen.DATA_PREFIX() << name << + "[] __attribute__((aligned (16))) = \n\t\""; + } + else { + out << "static const " << type << " " << + "_" << codeGen.DATA_PREFIX() << name << + "[] = {\n\t"; + } + } + else { + out << "array " << type << " " << + "_" << codeGen.DATA_PREFIX() << name << + "( " << min << ", " << max << " ) = { "; + } +} + +void TableArray::stringGenerate( long long value ) +{ + char c; + short h; + int i; +#if SIZEOF_INT != SIZEOF_LONG + long l; +#endif + unsigned char *p = 0; + int n = 0; + switch ( width ) { + case sizeof( char ): + c = value; + p = (unsigned char *)&c; + n = sizeof(char); + break; + case sizeof( short ): + h = value; + p = (unsigned char *)&h; + n = sizeof(short); + break; + case sizeof( int ): + i = value; + p = (unsigned char *)&i; + n = sizeof(int); + break; +#if SIZEOF_INT != SIZEOF_LONG + case sizeof( long ): + l = value; + p = (unsigned char *)&l; + n = sizeof(long); + break; +#endif + } + + std::ios_base::fmtflags prevFlags = out.flags( std::ios::hex ); + int prevFill = out.fill( '0' ); + + while ( n-- > 0 ) { + out << '\\'; + out << 'x'; + out << std::setw(2) << (unsigned int) *p++; + } + + out.flags( prevFlags ); + out.fill( prevFill ); +} + +void TableArray::valueGenerate( long long v ) +{ + if ( codeGen.backend == Direct ) { + if ( stringTables ) { + stringGenerate( v ); + + if ( ++ln % iall == 0 ) { + out << "\"\n\t\""; + ln = 0; + } + } + else { + if ( isChar ) + out << "c(" << v << ")"; + else if ( !isSigned ) + out << v << "u"; + else + out << v; + + if ( ( ++ln % iall ) == 0 ) { + out << ",\n\t"; + ln = 0; + } + else { + out << ", "; + } + } + } + else { + if ( isChar ) + out << "c(" << v << ")"; + else if ( !isSigned ) + out << "u(" << v << ")"; + else + out << v; + out << ", "; + } +} + +void TableArray::finishGenerate() +{ + if ( codeGen.backend == Direct ) { + if ( stringTables ) { + out << "\";\nconst " << type << " *_" << codeGen.DATA_PREFIX() << name << + " = (const " << type << "*) S_" << codeGen.DATA_PREFIX() << name << ";\n\n"; + + } + else { + if ( isChar ) + out << "c(0)\n};\n\n"; + else if ( !isSigned ) + out << "0u\n};\n\n"; + else + out << "0\n};\n\n"; + } + } + else { + if ( isChar ) + out << "c(0) };\n\n"; + else if ( !isSigned ) + out << "u(0) };\n\n"; + else + out << "0 };\n\n"; + } + + if ( codeGen.red->id->printStatistics ) { + codeGen.red->id->stats() << name << "\t" << values << "\t" << + size() << "\t" << endl; + } + + codeGen.tableData += size(); +} + +void TableArray::start() +{ + assert( !started ); + started = true; + switch ( state ) { + case InitialState: + break; + case AnalyzePass: + startAnalyze(); + break; + case GeneratePass: + if ( isReferenced ) + startGenerate(); + break; + } +} + +void TableArray::value( long long v ) +{ + assert( started ); + switch ( state ) { + case InitialState: + break; + case AnalyzePass: + valueAnalyze( v ); + break; + case GeneratePass: + if ( isReferenced ) + valueGenerate( v ); + break; + } +} + +void TableArray::finish() +{ + assert( started ); + started = false; + switch ( state ) { + case InitialState: + break; + case AnalyzePass: + finishAnalyze(); + break; + case GeneratePass: + if ( isReferenced ) + finishGenerate(); + break; + } +} + +/* Init code gen with in parameters. */ +CodeGen::CodeGen( const CodeGenArgs &args ) +: + CodeGenData( args ), + cpc( "_cpc" ), + pop_test( "_pop_test" ), + new_recs( "new_recs" ), + alt( "_alt" ), + tableData( 0 ), + backend( args.id->hostLang->backend ), + stringTables( args.id->stringTables ), + + nfaTargs( "nfa_targs", *this ), + nfaOffsets( "nfa_offsets", *this ), + nfaPushActions( "nfa_push_actions", *this ), + nfaPopTrans( "nfa_pop_trans", *this ) +{ +} + +void CodeGen::statsSummary() +{ + if ( red->id->printStatistics ) + red->id->stats() << "table-data\t\t" << tableData << endl << endl; +} + + +string CodeGen::CAST( string type ) +{ + if ( backend == Direct ) + return "(" + type + ")"; + else + return "cast(" + type + ")"; +} + +/* Write out the fsm name. */ +string CodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string CodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + + +string CodeGen::ACCESS() +{ + ostringstream ret; + if ( red->accessExpr != 0 ) { + ret << OPEN_HOST_PLAIN(); + INLINE_LIST( ret, red->accessExpr, 0, false, false ); + ret << CLOSE_HOST_PLAIN(); + ret << ACCESS_OPER(); + } + return ret.str(); +} + + +string CodeGen::P() +{ + ostringstream ret; + if ( red->pExpr == 0 ) + ret << "p"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->pExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::PE() +{ + ostringstream ret; + if ( red->peExpr == 0 ) + ret << "pe"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->peExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::vEOF() +{ + ostringstream ret; + if ( red->eofExpr == 0 ) + ret << "eof"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->eofExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::vCS() +{ + ostringstream ret; + if ( red->csExpr == 0 ) + ret << ACCESS() << "cs"; + else { + /* Emit the user supplied method of retrieving the key. */ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->csExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::TOP() +{ + ostringstream ret; + if ( red->topExpr == 0 ) + ret << ACCESS() + "top"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->topExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::STACK() +{ + ostringstream ret; + if ( red->stackExpr == 0 ) + ret << ACCESS() + "stack"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->stackExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::ACT() +{ + ostringstream ret; + if ( red->actExpr == 0 ) + ret << ACCESS() + "act"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->actExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::TOKSTART() +{ + ostringstream ret; + if ( red->tokstartExpr == 0 ) + ret << ACCESS() + "ts"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->tokstartExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::TOKEND() +{ + ostringstream ret; + if ( red->tokendExpr == 0 ) + ret << ACCESS() + "te"; + else { + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->tokendExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + return ret.str(); +} + +string CodeGen::GET_KEY() +{ + ostringstream ret; + if ( red->getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, red->getKeyExpr, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "( " << DEREF( "data", P() ) << ")"; + } + return ret.str(); +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string CodeGen::KEY( Key key ) +{ + if ( backend == Direct ) { + ostringstream ret; + if ( alphType->isChar ) + ret << "c(" << (unsigned long) key.getVal() << ")"; + else if ( keyOps->isSigned || !keyOps->explicitUnsigned ) + ret << key.getVal(); + else + ret << (unsigned long) key.getVal() << "u"; + return ret.str(); + } + else { + ostringstream ret; + if ( alphType->isChar ) + ret << "c(" << (unsigned long) key.getVal() << ")"; + else if ( keyOps->isSigned || !keyOps->explicitUnsigned ) + ret << key.getVal(); + else + ret << "u(" << (unsigned long) key.getVal() << ")"; + return ret.str(); + } +} + +bool CodeGen::isAlphTypeSigned() +{ + return keyOps->isSigned; +} + +void CodeGen::DECLARE( std::string type, Variable &var, std::string init ) +{ + if ( var.isReferenced ) + out << type << " " << var.name << init << ";\n"; +} + +void CodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + ret << OPEN_GEN_BLOCK() << P() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << "))-1;" << CLOSE_GEN_BLOCK() << "\n"; +} + +void CodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item, + int targState, int inFinish, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << "switch( " << ACT() << " ) {\n"; + + for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) { + /* Write the case label, the action and the case break. */ + if ( lma->lmId < 0 ) + ret << " " << DEFAULT() << " {\n"; + else + ret << " " << CASE( STR(lma->lmId) ) << " {\n"; + + /* Write the block and close it off. */ + INLINE_LIST( ret, lma->children, targState, inFinish, csForced ); + + ret << CEND() << "\n}\n"; + } + + ret << + " }" << CLOSE_GEN_BLOCK() << "\n" + "\t"; +} + +void CodeGen::LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. This should be in the D code generator. */ + ret << P() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish, false ); + ret << "))-1;\n"; +} + +void CodeGen::SET_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << ACT() << " = " << item->lmId << ";"; +} + +void CodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << TOKEND() << " = " << P(); + if ( item->offset != 0 ) + out << "+" << item->offset; + out << ";"; +} + +void CodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item ) +{ + ret << TOKEND(); +} + +void CodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << TOKSTART() << " = " << NIL() << ";"; +} + +void CodeGen::INIT_ACT( ostream &ret, GenInlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void CodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void CodeGen::HOST_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_HOST_BLOCK( item->loc.fileName, item->loc.line ); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_HOST_BLOCK(); + } +} + +#if 0 +void CodeGen::LM_CASE( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + } +} +#endif + +void CodeGen::HOST_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_HOST_EXPR(); + } +} + +void CodeGen::HOST_TEXT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_HOST_PLAIN(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_HOST_PLAIN(); + } +} + +void CodeGen::GEN_STMT( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_GEN_BLOCK(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_GEN_BLOCK(); + } +} + +void CodeGen::GEN_EXPR( ostream &ret, GenInlineItem *item, + int targState, bool inFinish, bool csForced ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << OPEN_GEN_EXPR(); + INLINE_LIST( ret, item->children, targState, inFinish, csForced ); + ret << CLOSE_GEN_EXPR(); + } +} + +void CodeGen::INLINE_EXPR( ostream &ret, GenInlineList *inlineList ) +{ + ret << OPEN_HOST_EXPR(); + INLINE_LIST( ret, inlineList, 0, false, false ); + ret << CLOSE_HOST_EXPR(); +} + +void CodeGen::INLINE_BLOCK( ostream &ret, GenInlineExpr *inlineExpr ) +{ + out << OPEN_HOST_BLOCK( inlineExpr ); + INLINE_LIST( out, inlineExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); +} + +void CodeGen::INLINE_PLAIN( ostream &ret, GenInlineExpr *inlineExpr ) +{ + +} + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void CodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Text: + if ( backend == Direct ) + ret << item->data; + else + translatedHostData( ret, item->data ); + break; + case GenInlineItem::Goto: + GOTO( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Call: + CALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::Ncall: + NCALL( ret, item->targState->id, targState, inFinish ); + break; + case GenInlineItem::Next: + NEXT( ret, item->targState->id, inFinish ); + break; + case GenInlineItem::Ret: + RET( ret, inFinish ); + break; + case GenInlineItem::Nret: + NRET( ret, inFinish ); + break; + case GenInlineItem::PChar: + ret << P(); + break; + case GenInlineItem::Char: + ret << OPEN_GEN_EXPR() << GET_KEY() << CLOSE_GEN_EXPR(); + break; + case GenInlineItem::Hold: + ret << OPEN_GEN_BLOCK() << P() << " = " << P() << " - 1; " << CLOSE_GEN_BLOCK(); + break; + case GenInlineItem::LmHold: + ret << P() << " = " << P() << " - 1;"; + break; + case GenInlineItem::NfaClear: + ret << "nfa_len = 0; "; + break; + case GenInlineItem::Exec: + EXEC( ret, item, targState, inFinish ); + break; + case GenInlineItem::Curs: + CURS( ret, inFinish ); + break; + case GenInlineItem::Targs: + TARGS( ret, inFinish, targState ); + break; + case GenInlineItem::Entry: + ret << item->targState->id; + break; + case GenInlineItem::GotoExpr: + GOTO_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::CallExpr: + CALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::NcallExpr: + NCALL_EXPR( ret, item, targState, inFinish ); + break; + case GenInlineItem::NextExpr: + NEXT_EXPR( ret, item, inFinish ); + break; + case GenInlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::LmExec: + LM_EXEC( ret, item, targState, inFinish ); + break; + case GenInlineItem::LmCase: + /* Not encountered here, in the lm switch. */ + break; + case GenInlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case GenInlineItem::LmGetTokEnd: + GET_TOKEND( ret, item ); + break; + case GenInlineItem::LmInitTokStart: + INIT_TOKSTART( ret, item ); + break; + case GenInlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case GenInlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case GenInlineItem::Break: + BREAK( ret, targState, csForced ); + break; + case GenInlineItem::Nbreak: + NBREAK( ret, targState, csForced ); + break; + case GenInlineItem::HostStmt: + HOST_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostExpr: + HOST_EXPR( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::HostText: + HOST_TEXT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenStmt: + GEN_STMT( ret, item, targState, inFinish, csForced ); + break; + case GenInlineItem::GenExpr: + GEN_EXPR( ret, item, targState, inFinish, csForced ); + break; + /* These should not be encountered. We handle these Nfa wraps at the top level. */ + case GenInlineItem::NfaWrapAction: + case GenInlineItem::NfaWrapConds: + break; + } + } +} + +/* Write out paths in line directives. Escapes any special characters. */ +string CodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void CodeGen::ACTION( ostream &ret, GenAction *action, IlOpts opts ) +{ + ret << '\t'; + ret << OPEN_HOST_BLOCK( action->loc.fileName, action->loc.line ); + INLINE_LIST( ret, action->inlineList, opts.targState, opts.inFinish, opts.csForced ); + ret << CLOSE_HOST_BLOCK(); + ret << "\n"; + genOutputLineDirective( ret ); +} + +void CodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << OPEN_HOST_EXPR( condition->loc.fileName, condition->loc.line ); + INLINE_LIST( ret, condition->inlineList, 0, false, false ); + ret << CLOSE_HOST_EXPR(); + ret << "\n"; + genOutputLineDirective( ret ); +} + +void CodeGen::NFA_CONDITION( ostream &ret, GenAction *condition, bool last ) +{ + if ( condition->inlineList->length() == 1 && + condition->inlineList->head->type == + GenInlineItem::NfaWrapAction ) + { + GenAction *action = condition->inlineList->head->wrappedAction; + ACTION( out, action, IlOpts( 0, false, false ) ); + } + else if ( condition->inlineList->length() == 1 && + condition->inlineList->head->type == + GenInlineItem::NfaWrapConds ) + { + ret << + " " << cpc << " = 0;\n"; + + GenCondSpace *condSpace = condition->inlineList->head->condSpace; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + ret << + " if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + ret << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + const CondKeySet &keys = condition->inlineList->head->condKeySet; + if ( keys.length() > 0 ) { + ret << pop_test << " = "; + for ( CondKeySet::Iter cki = keys; cki.lte(); cki++ ) { + ret << "" << cpc << " == " << *cki; + if ( !cki.last() ) + ret << " || "; + } + ret << ";\n"; + } + else { + ret << pop_test << " = 0;\n"; + } + + if ( !last ) { + ret << + "if ( !" << pop_test << " )\n" + " break;\n"; + } + } + else { + ret << pop_test << " = "; + CONDITION( ret, condition ); + ret << ";\n"; + + if ( !last ) { + ret << + "if ( !" << pop_test << " )\n" + " break;\n"; + } + } +} + +void CodeGen::NFA_POP_TEST_EXEC() +{ + out << + " " << pop_test << " = 1;\n" + " switch ( nfa_bp[nfa_len].popTrans ) {\n"; + + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; + redAct.lte(); redAct++ ) + { + if ( redAct->numNfaPopTestRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + NFA_CONDITION( out, item->value, item.last() ); + + out << CEND() << "\n}\n"; + } + } + + out << + " }\n" + "\n"; +} + + +string CodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string CodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +void CodeGen::writeInit() +{ + out << " {\n"; + + if ( !noCS ) + out << "\t" << vCS() << " = " << CAST("int") << START() << ";\n"; + + if ( redFsm->anyNfaStates() ) + out << "\t" << "nfa_len = 0;\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionNcalls() || + redFsm->anyActionRets() || redFsm->anyActionNrets() ) + { + out << "\t" << TOP() << " = 0;\n"; + } + + if ( red->hasLongestMatch ) { + out << + " " << TOKSTART() << " = " << NIL() << ";\n" + " " << TOKEND() << " = " << NIL() << ";\n"; + + if ( redFsm->usingAct() ) { + out << + " " << ACT() << " = 0;\n"; + } + } + out << " }\n"; +} + +string CodeGen::DATA_PREFIX() +{ + if ( !noPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string CodeGen::ALPH_TYPE() +{ + string ret = alphType->data1; + if ( alphType->data2 != 0 ) { + ret += " "; + ret += + alphType->data2; + } + return ret; +} + +void CodeGen::VALUE( string type, string name, string value ) +{ + if ( backend == Direct ) + out << "static const " << type << " " << name << " = " << value << ";\n"; + else + out << "value " << type << " " << name << " = " << value << ";\n"; +} + +string CodeGen::STR( int v ) +{ + ostringstream s; + s << v; + return s.str(); +} + +void CodeGen::STATE_IDS() +{ + if ( redFsm->startState != 0 ) + VALUE( "int", START(), START_STATE_ID() ); + + if ( !noFinal ) + VALUE( "int", FIRST_FINAL(), FIRST_FINAL_STATE() ); + + if ( !noError ) + VALUE( "int", ERROR(), ERROR_STATE() ); + + out << "\n"; + + if ( red->entryPointNames.length() > 0 ) { + for ( EntryNameVect::Iter en = red->entryPointNames; en.lte(); en++ ) { + string name = DATA_PREFIX() + "en_" + *en; + VALUE( "int", name, STR( red->entryPointIds[en.pos()] ) ); + } + out << "\n"; + } +} + +void CodeGen::writeStart() +{ + out << START_STATE_ID(); +} + +void CodeGen::writeFirstFinal() +{ + out << FIRST_FINAL_STATE(); +} + +void CodeGen::writeError() +{ + out << ERROR_STATE(); +} + +void CodeGen::writeExports() +{ + if ( red->exportList.length() > 0 ) { + for ( ExportList::Iter ex = red->exportList; ex.lte(); ex++ ) { + out << EXPORT( ALPH_TYPE(), + DATA_PREFIX() + "ex_" + ex->name, KEY(ex->key) ) << "\n"; + } + out << "\n"; + } +} + +void CodeGen::NFA_PUSH( std::string state ) +{ + if ( redFsm->anyNfaStates() ) { + out << + " if ( " << ARR_REF( nfaOffsets ) << "[" << state << "] != 0 ) {\n" + " " << alt << " = 0; \n" + " " << new_recs << " = " << CAST("int") << ARR_REF( nfaTargs ) << "[" << CAST("int") << + ARR_REF( nfaOffsets ) << "[" << state << "]];\n"; + + if ( red->nfaPrePushExpr != 0 ) { + out << OPEN_HOST_BLOCK( red->nfaPrePushExpr ); + INLINE_LIST( out, red->nfaPrePushExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); + out << "\n"; + genOutputLineDirective( out ); + } + + out << + " while ( " << alt << " < " << new_recs << " ) { \n"; + + + out << + " nfa_bp[nfa_len].state = " << CAST("int") << ARR_REF( nfaTargs ) << "[" << CAST("int") << + ARR_REF( nfaOffsets ) << "[" << state << "] + 1 + " << alt << "];\n" + " nfa_bp[nfa_len].p = " << P() << ";\n"; + + if ( redFsm->bAnyNfaPops ) { + out << + " nfa_bp[nfa_len].popTrans = " << ARR_REF( nfaPopTrans ) << "[" << CAST("long") << + ARR_REF( nfaOffsets ) << "[" << state << "] + 1 + " << alt << "];\n" + "\n" + ; + } + + if ( redFsm->bAnyNfaPushes ) { + out << + " switch ( " << ARR_REF( nfaPushActions ) << "[" << CAST("int") << + ARR_REF( nfaOffsets ) << "[" << state << "] + 1 + " << alt << "] ) {\n"; + + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; + redAct.lte(); redAct++ ) + { + if ( redAct->numNfaPushRefs > 0 ) { + /* Write the entry label. */ + out << "\t " << CASE( STR( redAct->actListId+1 ) ) << " {\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + out << + " }\n"; + } + + + out << + " nfa_len += 1;\n" + " " << alt << " += 1;\n" + " }\n" + " }\n" + ; + } +} + +void CodeGen::NFA_POST_POP() +{ + if ( red->nfaPostPopExpr != 0 ) { + out << OPEN_HOST_BLOCK( red->nfaPostPopExpr ); + INLINE_LIST( out, red->nfaPostPopExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); + } +} diff --git a/libfsm/codegen.h b/libfsm/codegen.h new file mode 100644 index 00000000..904c839f --- /dev/null +++ b/libfsm/codegen.h @@ -0,0 +1,459 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_CODEGEN_H +#define _C_CODEGEN_H + +#include <iostream> +#include <string> +#include <stdio.h> +#include "common.h" +#include "gendata.h" +#include "vector.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +//#define IALL 8 + +#define IALL_INTEGRAL 8 +#define IALL_STRING 128 + + +/* Forwards. */ +struct RedFsmAp; +struct RedStateAp; +struct CodeGenData; +struct GenAction; +struct NameInst; +struct GenInlineItem; +struct GenInlineList; +struct RedAction; +struct LongestMatch; +struct LongestMatchPart; + +string itoa( int i ); + +struct Variable +{ + Variable( const char *name ) : name(name), isReferenced(false) {} + + const std::string ref() { isReferenced = true; return name; } + + const char *name; + bool isReferenced; +}; + +struct GotoLabel +{ + GotoLabel( const char *name ) : name(name), isReferenced(false) {} + + const std::string ref() { isReferenced = true; return name; } + + const char *name; + bool isReferenced; +}; + +std::ostream &operator<<( std::ostream &out, GotoLabel &l ); +std::ostream &operator<<( std::ostream &out, Variable &v ); + +struct TableArray; +typedef Vector<TableArray*> ArrayVector; +class CodeGen; + +struct TableArray +{ + enum State { + InitialState = 1, + AnalyzePass, + GeneratePass + }; + + TableArray( const char *name, CodeGen &codeGen ); + + void start(); + void startAnalyze(); + void startGenerate(); + + void setType( std::string type, int width, bool isChar ) + { + this->type = type; + this->width = width; + this->isChar = isChar; + } + + std::string ref(); + + void value( long long v ); + + void valueAnalyze( long long v ); + void valueGenerate( long long v ); + void stringGenerate( long long value ); + + void finish(); + void finishAnalyze(); + void finishGenerate(); + + void setState( TableArray::State state ) + { this->state = state; } + + long long size(); + + State state; + const char *name; + std::string type; + int width; + bool isSigned; + bool isChar; + bool stringTables; + int iall; + long long values; + long long min; + long long max; + CodeGen &codeGen; + std::ostream &out; + int ln; + bool isReferenced; + bool started; +}; + +struct IlOpts +{ + IlOpts( int targState, bool inFinish, bool csForced ) + : targState(targState), inFinish(inFinish), csForced(csForced) {} + + int targState; + bool inFinish; + bool csForced; +}; + + +/* + * class CodeGen + */ +class CodeGen : public CodeGenData +{ +public: + CodeGen( const CodeGenArgs &args ); + + virtual ~CodeGen() {} + + virtual void writeInit(); + virtual void writeStart(); + virtual void writeFirstFinal(); + virtual void writeError(); + virtual void statsSummary(); + +protected: + friend TableArray; + typedef Vector<TableArray*> ArrayVector; + ArrayVector arrayVector; + + Variable cpc; + Variable pop_test; + Variable new_recs; + Variable alt; + + string FSM_NAME(); + string START_STATE_ID(); + void taActions(); + string KEY( Key key ); + string LDIR_PATH( char *path ); + + void ACTION( ostream &ret, GenAction *action, IlOpts opts ); + void NFA_CONDITION( ostream &ret, GenAction *condition, bool last ); + void NFA_POP_TEST_EXEC(); + void CONDITION( ostream &ret, GenAction *condition ); + string ALPH_TYPE(); + + bool isAlphTypeSigned(); + long long tableData; + RagelBackend backend; + bool stringTables; + BackendFeature backendFeature; + + TableArray nfaTargs; + TableArray nfaOffsets; + TableArray nfaPushActions; + TableArray nfaPopTrans; + + virtual string GET_KEY(); + + string P(); + string PE(); + string vEOF(); + + string ACCESS(); + string vCS(); + string STACK(); + string TOP(); + string TOKSTART(); + string TOKEND(); + string ACT(); + + string DATA_PREFIX(); + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + + /* Declare a variable only if referenced. */ + void DECLARE( std::string type, Variable &var, std::string init = "" ); + + string CAST( string type ); + + string ARR_TYPE( const TableArray &ta ) + { return ta.type; } + + string ARR_REF( TableArray &ta ) + { return ta.ref(); } + + void INLINE_EXPR( ostream &ret, GenInlineList *inlineList ); + void INLINE_BLOCK( ostream &ret, GenInlineExpr *inlineExpr ); + void INLINE_PLAIN( ostream &ret, GenInlineExpr *inlineExpr ); + + void INLINE_LIST( ostream &ret, GenInlineList *inlineList, + int targState, bool inFinish, bool csForced ); + virtual void GOTO( ostream &ret, int gotoDest, bool inFinish ) = 0; + virtual void CALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0; + virtual void NCALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0; + virtual void NEXT( ostream &ret, int nextDest, bool inFinish ) = 0; + virtual void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) = 0; + virtual void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) = 0; + virtual void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, + int targState, bool inFinish ) = 0; + virtual void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, + int targState, bool inFinish ) = 0; + virtual void RET( ostream &ret, bool inFinish ) = 0; + virtual void NRET( ostream &ret, bool inFinish ) = 0; + virtual void BREAK( ostream &ret, int targState, bool csForced ) = 0; + virtual void NBREAK( ostream &ret, int targState, bool csForced ) = 0; + virtual void CURS( ostream &ret, bool inFinish ) = 0; + virtual void TARGS( ostream &ret, bool inFinish, int targState ) = 0; + void EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, GenInlineItem *item, int targState, + int inFinish, bool csForced ); + void LM_EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, GenInlineItem *item ); + void INIT_TOKSTART( ostream &ret, GenInlineItem *item ); + void INIT_ACT( ostream &ret, GenInlineItem *item ); + void SET_TOKSTART( ostream &ret, GenInlineItem *item ); + void SET_TOKEND( ostream &ret, GenInlineItem *item ); + void GET_TOKEND( ostream &ret, GenInlineItem *item ); + + void HOST_STMT( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void HOST_EXPR( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void HOST_TEXT( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void GEN_STMT( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + void GEN_EXPR( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ); + + void STATE_IDS(); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + string STR( int v ); + + void VALUE( string type, string name, string value ); + + string ACCESS_OPER() + { return backend == Direct ? "" : " -> "; } + + string OPEN_HOST_EXPR() + { return backend == Direct ? "(" : "host( \"-\", 1 ) ={"; } + + string OPEN_HOST_EXPR( string fileName, int line ) + { + return backend == Direct ? "(" : "host( \"" + fileName + "\", " + STR(line) + " ) ={"; + } + + string CLOSE_HOST_EXPR() + { return backend == Direct ? ")" : "}="; } + + string OPEN_HOST_BLOCK( string fileName, int line ) + { + if ( backend == Direct ) { + std::stringstream ss; + ss << "{\n" ; + (*genLineDirective)( ss, lineDirectives, line, fileName.c_str() ); + return ss.str(); + } + else { + return "host( \"" + fileName + "\", " + STR(line) + " ) ${"; + } + } + + string OPEN_HOST_BLOCK( GenInlineExpr *inlineExpr ) + { + return OPEN_HOST_BLOCK( inlineExpr->loc.fileName, inlineExpr->loc.line ); + } + + string CLOSE_HOST_BLOCK() + { return backend == Direct ? "}\n" : "}$"; } + + string OPEN_HOST_PLAIN() + { return backend == Direct ? "" : "host( \"-\", 1 ) @{"; } + + string CLOSE_HOST_PLAIN() + { return backend == Direct ? "" : "}@"; } + + string OPEN_GEN_EXPR() + { return backend == Direct ? "(" : "={"; } + + string CLOSE_GEN_EXPR() + { return backend == Direct ? ")" : "}="; } + + string OPEN_GEN_BLOCK() + { return backend == Direct ? "{" : "${"; } + + string CLOSE_GEN_BLOCK() + { return backend == Direct ? "}" : "}$"; } + + string OPEN_GEN_PLAIN() + { return backend == Direct ? "" : "@{"; } + + string CLOSE_GEN_PLAIN() + { return backend == Direct ? "" : "}@"; } + + string INT() + { return "int"; } + + string UINT() + { return backend == Direct ? "unsigned int" : "uint"; } + + string INDEX( string type, string name ) + { + if ( backend == Direct ) + return "const " + type + " *" + name; + else + return "index " + type + " " + name; + } + + string INDEX( string type ) + { + if ( backend == Direct ) + return "const " + type + " *"; + else + return "index " + type + " "; + } + + string LABEL( string name ) + { + return name + ": "; + } + + string EMIT_LABEL( GotoLabel label ) + { + if ( label.isReferenced ) + return std::string(label.name) + ": {}\n"; + else + return ""; + } + + string OFFSET( string arr, string off ) + { + if ( backend == Direct ) + return "( " + arr + " + (" + off + "))"; + else + return "offset( " + arr + ", " + off + " )"; + } + + string TRUE() + { + if ( backend == Direct ) + return "1"; + else + return "TRUE"; + } + + string DEREF( string arr, string off ) + { + if ( backend == Direct ) + return "(*( " + off + "))"; + else + return "deref( " + arr + ", " + off + " )"; + } + + string CASE( string val ) + { + if ( backend == Direct ) + return "case " + val + ": "; + else + return "case " + val; + } + + string DEFAULT() + { + if ( backend == Direct ) + return "default:"; + else + return "default"; + } + + string CEND( ) + { + if ( backend == Direct ) + return " break; "; + else + return " "; + } + + string FALLTHROUGH() + { + if ( backend == Direct ) + return " "; + else + return "fallthrough;"; + } + + string NIL() + { + if ( backend == Direct ) + return "0"; + else + return "nil"; + } + + string EXPORT( string type, string name, string value ) + { + if ( backend == Direct ) + return "#define " + name + " " + value; + else + return "export " + type + " " + name + " " + value + ";"; + } + + void NFA_POST_POP(); + virtual void NFA_PUSH( std::string ); + virtual void NFA_POP() = 0; + virtual void LOCATE_TRANS() {} + virtual void LOCATE_COND() {} + virtual void EOF_TRANS() {} + + + virtual void COND_EXEC( std::string expr ) {} + virtual void COND_BIN_SEARCH( Variable &var, TableArray &keys, std::string ok, std::string error ) {} + +public: + virtual void writeExports(); +}; + +#endif diff --git a/libfsm/common.cc b/libfsm/common.cc new file mode 100644 index 00000000..6e0f5c0c --- /dev/null +++ b/libfsm/common.cc @@ -0,0 +1,482 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pcheck.h" +#include "common.h" +#include "stdlib.h" +#include <string.h> +#include <assert.h> +#include "ragel.h" + +/* + * C + */ + +const char *defaultOutFnC( const char *inputFileName ) +{ + const char *ext = findFileExtension( inputFileName ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + return fileNameFromStem( inputFileName, ".h" ); + else + return fileNameFromStem( inputFileName, ".c" ); +} + +HostType hostTypesC[] = +{ + { "char", 0, "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, sizeof(char) }, + { "signed", "char", "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, sizeof(char) }, + { "unsigned", "char", "uchar", false, true, false, 0, 0, 0, UCHAR_MAX, sizeof(unsigned char) }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, sizeof(short) }, + { "signed", "short", "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, sizeof(short) }, + { "unsigned", "short", "ushort", false, true, false, 0, 0, 0, USHRT_MAX, sizeof(unsigned short) }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "signed", "int", "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "unsigned", "int", "uint", false, true, false, 0, 0, 0, UINT_MAX, sizeof(unsigned int) }, + { "long", 0, "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, sizeof(long) }, + { "signed", "long", "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, sizeof(long) }, + { "unsigned", "long", "ulong", false, true, false, 0, 0, 0, ULONG_MAX, sizeof(unsigned long) }, +}; + +const HostLang hostLangC = { + hostTypesC, + 12, + 0, + true, + false, /* loopLabels */ + Direct, + GotoFeature, + &makeCodeGen, + &defaultOutFnC, + &genLineDirectiveC +}; + +/* + * ASM + */ +const char *defaultOutFnAsm( const char *inputFileName ) +{ + return fileNameFromStem( inputFileName, ".s" ); +} + +HostType hostTypesAsm[] = +{ + { "char", 0, "char", true, true, false, CHAR_MIN, CHAR_MAX, 0, 0, sizeof(char) }, + { "unsigned", "char", "uchar", false, true, false, 0, 0, 0, UCHAR_MAX, sizeof(unsigned char) }, + { "short", 0, "short", true, true, false, SHRT_MIN, SHRT_MAX, 0, 0, sizeof(short) }, + { "unsigned", "short", "ushort", false, true, false, 0, 0, 0, USHRT_MAX, sizeof(unsigned short) }, + { "int", 0, "int", true, true, false, INT_MIN, INT_MAX, 0, 0, sizeof(int) }, + { "unsigned", "int", "uint", false, true, false, 0, 0, 0, UINT_MAX, sizeof(unsigned int) }, + { "long", 0, "long", true, true, false, LONG_MIN, LONG_MAX, 0, 0, sizeof(long) }, + { "unsigned", "long", "ulong", false, true, false, 0, 0, 0, ULONG_MAX, sizeof(unsigned long) }, +}; + +const HostLang hostLangAsm = { + hostTypesAsm, + 8, + 0, + true, + false, /* loopLabels */ + Direct, + GotoFeature, + &makeCodeGenAsm, + &defaultOutFnC, + &genLineDirectiveAsm +}; + +HostType *findAlphType( const HostLang *hostLang, const char *s1 ) +{ + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 == 0 ) + { + return hostLang->hostTypes + i; + } + } + + return 0; +} + +HostType *findAlphType( const HostLang *hostLang, const char *s1, const char *s2 ) +{ + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 != 0 && + strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 ) + { + return hostLang->hostTypes + i; + } + } + + return 0; +} + +HostType *findAlphTypeInternal( const HostLang *hostLang, const char *s1 ) +{ + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].internalName ) == 0 ) + return hostLang->hostTypes + i; + } + + return 0; +} + +/* Construct a new parameter checker with for paramSpec. */ +ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv ) +: + state(noparam), + argOffset(0), + curArg(0), + iCurArg(1), + paramSpec(paramSpec), + argc(argc), + argv(argv) +{ +} + +/* Check a single option. Returns the index of the next parameter. Sets p to + * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if + * there is one, NULL otherwise. */ +bool ParamCheck::check() +{ + bool requiresParam; + + if ( iCurArg >= argc ) { /* Off the end of the arg list. */ + state = noparam; + return false; + } + + if ( argOffset != 0 && *argOffset == 0 ) { + /* We are at the end of an arg string. */ + iCurArg += 1; + if ( iCurArg >= argc ) { + state = noparam; + return false; + } + argOffset = 0; + } + + if ( argOffset == 0 ) { + /* Set the current arg. */ + curArg = argv[iCurArg]; + + /* We are at the beginning of an arg string. */ + if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ + argv[iCurArg][0] != '-' || /* Not a param. */ + argv[iCurArg][1] == 0 ) { /* Only a dash. */ + parameter = 0; + paramArg = 0; + + iCurArg += 1; + state = noparam; + return true; + } + argOffset = argv[iCurArg] + 1; + } + + /* Get the arg char. */ + char argChar = *argOffset; + + /* Loop over all the parms and look for a match. */ + const char *pSpec = paramSpec; + while ( *pSpec != 0 ) { + char pSpecChar = *pSpec; + + /* If there is a ':' following the char then + * it requires a parm. If a parm is required + * then move ahead two in the parmspec. Otherwise + * move ahead one in the parm spec. */ + if ( pSpec[1] == ':' ) { + requiresParam = true; + pSpec += 2; + } + else { + requiresParam = false; + pSpec += 1; + } + + /* Do we have a match. */ + if ( argChar == pSpecChar ) { + if ( requiresParam ) { + if ( argOffset[1] == 0 ) { + /* The param must follow. */ + if ( iCurArg + 1 == argc ) { + /* We are the last arg so there + * cannot be a parameter to it. */ + parameter = argChar; + paramArg = 0; + iCurArg += 1; + argOffset = 0; + state = invalid; + return true; + } + else { + /* the parameter to the arg is the next arg. */ + parameter = pSpecChar; + paramArg = argv[iCurArg + 1]; + iCurArg += 2; + argOffset = 0; + state = match; + return true; + } + } + else { + /* The param for the arg is built in. */ + parameter = pSpecChar; + paramArg = argOffset + 1; + iCurArg += 1; + argOffset = 0; + state = match; + return true; + } + } + else { + /* Good, we matched the parm and no + * arg is required. */ + parameter = pSpecChar; + paramArg = 0; + argOffset += 1; + state = match; + return true; + } + } + } + + /* We did not find a match. Bad Argument. */ + parameter = argChar; + paramArg = 0; + argOffset += 1; + state = invalid; + return true; +} + +std::streamsize output_filter::countAndWrite( const char *s, std::streamsize n ) +{ + for ( int i = 0; i < n; i++ ) { + switch ( s[i] ) { + case '\n': + line += 1; + break; + case '{': + /* If we detec an open block then eliminate the single-indent + * addition, which is to account for single statements. */ + singleIndent = false; + level += 1; + break; + case '}': + level -= 1; + break; + } + } + + return std::filebuf::xsputn( s, n ); +} + +bool openSingleIndent( const char *s, int n ) +{ + if ( n >= 3 && memcmp( s, "if ", 3 ) == 0 ) + return true; + + if ( n >= 8 && memcmp( s, "else if ", 8 ) == 0 ) + return true; + + if ( n >= 5 && memcmp( s, "else\n", 4 ) == 0 ) + return true; + + return false; +} + +/* Counts newlines before sending sync. */ +int output_filter::sync( ) +{ + line += 1; + return std::filebuf::sync(); +} + +/* Counts newlines before sending data out to file. */ +std::streamsize output_filter::xsputn( const char *s, std::streamsize n ) +{ + std::streamsize ret = n; + int l; + +restart: + if ( indent ) { + /* Consume mode Looking for the first non-whitespace. */ + while ( n > 0 && ( *s == ' ' || *s == '\t' ) ) { + s += 1; + n -= 1; + } + + if ( n > 0 ) { + int tabs = level + ( singleIndent ? 1 : 0 ); + + if ( *s == '}' ) { + /* If the next char is de-dent, then reduce the tabs. This is + * not a stream state change. The level reduction will be + * computed in write. */ + tabs -= 1; + } + + /* Note that the count and write will eliminate this if it detects + * an open block. */ + if ( openSingleIndent( s, n ) ) + singleIndent = true; + else + singleIndent = false; + + if ( *s != '#' ) { + /* Found some data, print the indentation and turn off indentation + * mode. */ + for ( l = 0; l < tabs; l++ ) + countAndWrite( "\t", 1 ); + } + + + indent = 0; + + goto restart; + } + } + else { + char *nl; + if ( (nl = (char*)memchr( s, '\n', n )) ) { + /* Print up to and including the newline. */ + int wl = nl - s + 1; + countAndWrite( s, wl ); + + /* Go into consume state. If we see more non-indentation chars we + * will generate the appropriate indentation level. */ + s += wl; + n -= wl; + indent = true; + goto restart; + } + else { + /* Indentation off, or no indent trigger (newline). */ + countAndWrite( s, n ); + } + } + + // What to do here? + return ret; +} + +/* Scans a string looking for the file extension. If there is a file + * extension then pointer returned points to inside the string + * passed in. Otherwise returns null. */ +const char *findFileExtension( const char *stemFile ) +{ + const char *ppos = stemFile + strlen(stemFile) - 1; + + /* Scan backwards from the end looking for the first dot. + * If we encounter a '/' before the first dot, then stop the scan. */ + while ( 1 ) { + /* If we found a dot or got to the beginning of the string then + * we are done. */ + if ( ppos == stemFile || *ppos == '.' ) + break; + + /* If we hit a / then there is no extension. Done. */ + if ( *ppos == '/' ) { + ppos = stemFile; + break; + } + ppos--; + } + + /* If we got to the front of the string then bail we + * did not find an extension */ + if ( ppos == stemFile ) + ppos = 0; + + return ppos; +} + +/* Make a file name from a stem. Removes the old filename suffix and + * replaces it with a new one. Returns a newed up string. */ +const char *fileNameFromStem( const char *stemFile, const char *suffix ) +{ + long len = strlen( stemFile ); + assert( len > 0 ); + + /* Get the extension. */ + const char *ppos = findFileExtension( stemFile ); + + /* If an extension was found, then shorten what we think the len is. */ + if ( ppos != 0 ) + len = ppos - stemFile; + + /* Make the return string from the stem and the suffix. */ + char *retVal = new char[ len + strlen( suffix ) + 1 ]; + strncpy( retVal, stemFile, len ); + strcpy( retVal + len, suffix ); + + return retVal; +} + +exit_object endp; + +void operator<<( std::ostream &out, exit_object & ) +{ + out << std::endl; + throw AbortCompile( 1 ); +} + +void genLineDirectiveC( std::ostream &out, bool lineDirectives, int line, const char *fileName ) +{ + if ( !lineDirectives ) + out << "/* "; + + out << "#line " << line << " \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else if ( *pc == '"' ) + out << "\\\""; + else + out << *pc; + } + out << '"'; + + if ( !lineDirectives ) + out << " */"; + + out << '\n'; +} + +void genLineDirectiveAsm( std::ostream &out, bool lineDirectives, int line, const char *fileName ) +{ + out << "/* #line " << line << " \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else if ( *pc == '"' ) + out << "\\\""; + else + out << *pc; + } + out << '"'; + out << " */\n"; +} + +void genLineDirectiveTrans( std::ostream &out, bool lineDirectives, int line, const char *fileName ) +{ +} diff --git a/libfsm/common.h b/libfsm/common.h new file mode 100644 index 00000000..142eb735 --- /dev/null +++ b/libfsm/common.h @@ -0,0 +1,504 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COMMON_H +#define _COMMON_H + +#include <iostream> +#include <fstream> +#include <climits> +#include "dlist.h" + +struct colm_location; + +struct InputData; +struct CodeGenData; +struct HostLang; +struct CodeGenArgs; + +enum RagelBackend +{ + Direct, + Translated +}; + +enum BackendFeature +{ + GotoFeature, + BreakFeature, + VarFeature +}; + +#define S8BIT_MIN -128 +#define S8BIT_MAX 127 + +#define U8BIT_MIN 0 +#define U8BIT_MAX 255 + +#define S16BIT_MIN -32768 +#define S16BIT_MAX 32767 + +#define U16BIT_MIN 0 +#define U16BIT_MAX 65535 + +#define S31BIT_MIN -1073741824l +#define S31BIT_MAX 1073741823l + +#define S32BIT_MIN -2147483648l +#define S32BIT_MAX 2147483647l + +#define U32BIT_MIN 0 +#define U32BIT_MAX 4294967295l + +#define S64BIT_MIN (-9223372036854775807LL - 1LL) +#define S64BIT_MAX 9223372036854775807LL + +#define U64BIT_MIN 0 +#define U64BIT_MAX 18446744073709551615ULL + +struct ParserLoc +{ + const char *fileName; + int line; + int col; +}; + +/* Location in an input file. */ +struct InputLoc +{ + InputLoc( colm_location *pcloc ); + + InputLoc() : fileName(0), line(-1), col(-1) {} + + InputLoc( const ParserLoc loc ) + { + fileName = loc.fileName; + line = loc.line; + col = loc.col; + + if ( fileName == 0 ) + fileName = "-"; + if ( line == 0 ) + line = 1; + } + + InputLoc( const InputLoc &loc ) + { + fileName = loc.fileName; + line = loc.line; + col = loc.col; + + if ( fileName == 0 ) + fileName = "-"; + if ( line == 0 ) + line = 1; + } + + InputLoc( const char *fileName, int line, int col ) + : fileName(fileName), line(line), col(col) {} + + const char *fileName; + int line; + int col; +}; + +extern InputLoc internal; + +typedef unsigned long long Size; + +struct Key +{ +private: + long key; + +public: + friend struct KeyOps; + + Key( ) {} + Key( const Key &key ) : key(key.key) {} + Key( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const + { + return ( 7 <= key && key <= 13 ) || ( 32 <= key && key < 127 ); + } + + Key toUpper() const + { return Key( 'A' + ( key - 'a' ) ); } + Key toLower() const + { return Key( 'a' + ( key - 'A' ) ); } +}; + +struct CondKey +{ +private: + long key; + +public: + friend inline bool operator<( const CondKey key1, const CondKey key2 ); + friend inline bool operator>( const CondKey key1, const CondKey key2 ); + friend inline bool operator==( const CondKey key1, const CondKey key2 ); + friend inline CondKey operator+( const CondKey key1, const CondKey key2 ); + friend inline CondKey operator-( const CondKey key1, const CondKey key2 ); + + friend struct KeyOps; + + CondKey( ) {} + CondKey( const CondKey &key ) : key(key.key) {} + CondKey( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const + { + return ( 7 <= key && key <= 13 ) || ( 32 <= key && key < 127 ); + } + + CondKey toUpper() const + { return CondKey( 'A' + ( key - 'a' ) ); } + CondKey toLower() const + { return CondKey( 'a' + ( key - 'A' ) ); } + + /* Decrement. Needed only for ranges. */ + inline void decrement(); + inline void increment(); +}; + +inline CondKey operator+(const CondKey key1, const CondKey key2) +{ + return CondKey( key1.key + key2.key ); +} + +inline CondKey operator-(const CondKey key1, const CondKey key2) +{ + return CondKey( key1.key - key2.key ); +} + +struct HostType +{ + const char *data1; + const char *data2; + const char *internalName; + bool isSigned; + bool isOrd; + bool isChar; + long long sMinVal; + long long sMaxVal; + unsigned long long uMinVal; + unsigned long long uMaxVal; + unsigned int size; +}; + +typedef void (*GenLineDirectiveT)( std::ostream &out, bool nld, int line, const char *file ); +typedef const char *(*DefaultOutFnT)( const char *inputFileName ); +typedef CodeGenData *(*MakeCodeGenT)( const HostLang *hostLang, const CodeGenArgs &args ); + +struct HostLang +{ + HostType *hostTypes; + int numHostTypes; + int defaultAlphType; + bool explicitUnsigned; + bool loopLabels; + + RagelBackend backend; + BackendFeature feature; + + MakeCodeGenT makeCodeGen; + DefaultOutFnT defaultOutFn; + GenLineDirectiveT genLineDirective; +}; + +void genLineDirectiveC( std::ostream &out, bool nld, int line, const char *file ); +void genLineDirectiveAsm( std::ostream &out, bool nld, int line, const char *file ); +void genLineDirectiveTrans( std::ostream &out, bool nld, int line, const char *file ); + +extern const HostLang hostLangC; +extern const HostLang hostLangAsm; + +HostType *findAlphType( const HostLang *hostLang, const char *s1 ); +HostType *findAlphType( const HostLang *hostLang, const char *s1, const char *s2 ); +HostType *findAlphTypeInternal( const HostLang *hostLang, const char *s1 ); + +const char *defaultOutFnC( const char *inputFileName ); +extern HostType hostTypesC[]; + +/* An abstraction of the key operators that manages key operations such as + * comparison and increment according the signedness of the key. */ +struct KeyOps +{ + /* Defaults to C "char" type: Signed 8 bit. */ + KeyOps() + : + isSigned(true), + explicitUnsigned(true), + minKey(CHAR_MIN), + maxKey(CHAR_MAX) + {} + + bool isSigned; + bool explicitUnsigned; + Key minKey, maxKey; + + void setAlphType( const HostLang *hostLang, const HostType *alphType ) + { + isSigned = alphType->isSigned; + explicitUnsigned = hostLang->explicitUnsigned; + + if ( isSigned ) { + minKey = (long) alphType->sMinVal; + maxKey = (long) alphType->sMaxVal; + } + else { + minKey = (long) alphType->uMinVal; + maxKey = (long) alphType->uMaxVal; + } + } + + /* Compute the distance between two keys. */ + Size span( Key key1, Key key2 ) + { + return isSigned ? + (unsigned long long)( + (long long)key2.key - + (long long)key1.key + 1) : + (unsigned long long)( + (unsigned long)key2.key) - + (unsigned long long)((unsigned long)key1.key) + 1; + } + + Size alphSize() + { return span( minKey, maxKey ); } + + inline bool lt( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key < key2.key : + (unsigned long)key1.key < (unsigned long)key2.key; + } + + inline bool le( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key <= key2.key : + (unsigned long)key1.key <= (unsigned long)key2.key; + } + + inline bool gt( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key > key2.key : + (unsigned long)key1.key > (unsigned long)key2.key; + } + + inline bool ge( const Key key1, const Key key2 ) + { + return this->isSigned ? key1.key >= key2.key : + (unsigned long)key1.key >= (unsigned long)key2.key; + } + + inline bool eq( const Key key1, const Key key2 ) + { + return key1.key == key2.key; + } + + inline bool ne( const Key key1, const Key key2 ) + { + return key1.key != key2.key; + } + + inline Key add(const Key key1, const Key key2) + { + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key + key2.key ); + } + + inline Key sub(const Key key1, const Key key2) + { + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key - key2.key ); + } + + /* Decrement. Needed only for ranges. */ + inline void decrement( Key &key ) + { + key.key = this->isSigned ? key.key - 1 : ((unsigned long)key.key)-1; + } + + /* Increment. Needed only for ranges. */ + inline void increment( Key &key ) + { + key.key = this->isSigned ? key.key+1 : ((unsigned long)key.key)+1; + } + + /* Returns the key casted to a long long. This form of the key does not + * require any signedness interpretation. */ + inline long long getLongLong( const Key &key ) + { + return this->isSigned ? (long long)key.key : (long long)(unsigned long)key.key; + } +}; + +/* CondKey */ + +inline bool operator<( const CondKey key1, const CondKey key2 ) +{ + return key1.key < key2.key; +} + +inline bool operator>( const CondKey key1, const CondKey key2 ) +{ + return key1.key > key2.key; +} + +inline bool operator==( const CondKey key1, const CondKey key2 ) +{ + return key1.key == key2.key; +} + +/* Increment. Needed only for ranges. */ +inline void CondKey::increment() +{ + key = key + 1; +} + + +/* Filter on the output stream that keeps track of the number of lines + * output. */ +class output_filter +: + public std::filebuf +{ +public: + output_filter( const char *fileName ) + : + fileName(fileName), + line(1), + level(0), + indent(false), + singleIndent(false) + {} + + virtual int sync(); + virtual std::streamsize xsputn( const char* s, std::streamsize n ); + + std::streamsize countAndWrite( const char* s, std::streamsize n ); + + const char *fileName; + int line; + int level; + bool indent; + bool singleIndent; +}; + +class nullbuf +: + public std::streambuf +{ +public: + virtual std::streamsize xsputn( const char * s, std::streamsize n ) + { return n; } + + virtual int overflow( int c ) + { return 1; } +}; + +class cfilebuf : public std::streambuf +{ +public: + cfilebuf( char *fileName, FILE* file ) : fileName(fileName), file(file) { } + char *fileName; + FILE *file; + + int sync() + { + fflush( file ); + return 0; + } + + int overflow( int c ) + { + if ( c != EOF ) + fputc( c, file ); + return 0; + } + + std::streamsize xsputn( const char* s, std::streamsize n ) + { + std::streamsize written = fwrite( s, 1, n, file ); + return written; + } +}; + +class costream : public std::ostream +{ +public: + costream( cfilebuf *b ) : + std::ostream(b), b(b) {} + + ~costream() + { delete b; } + + void fclose() + { ::fclose( b->file ); } + + cfilebuf *b; +}; + + +const char *findFileExtension( const char *stemFile ); +const char *fileNameFromStem( const char *stemFile, const char *suffix ); + +struct Export +{ + Export( std::string name, Key key ) + : name(name), key(key) {} + + std::string name; + Key key; + + Export *prev, *next; +}; + +typedef DList<Export> ExportList; + +struct exit_object { }; +extern exit_object endp; +void operator<<( std::ostream &out, exit_object & ); + +enum RagelFrontend +{ + KelbtBased, + ReduceBased +}; + +CodeGenData *makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ); +CodeGenData *makeCodeGenAsm( const HostLang *hostLang, const CodeGenArgs &args ); + +#endif diff --git a/libfsm/config.h.cmake.in b/libfsm/config.h.cmake.in new file mode 100644 index 00000000..ad4bf494 --- /dev/null +++ b/libfsm/config.h.cmake.in @@ -0,0 +1,13 @@ +/* config.h Generated from config.h.cmake.in by cmake */ + +#ifndef _COLM_CONFIG_H +#define _COLM_CONFIG_H + +#cmakedefine DEBUG 1 + +#cmakedefine HAVE_SYS_WAIT_H 1 + +#cmakedefine SIZEOF_INT @SIZEOF_INT@ +#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ + +#endif /* _COLM_CONFIG_H */ diff --git a/libfsm/dot.cc b/libfsm/dot.cc new file mode 100644 index 00000000..edd4225b --- /dev/null +++ b/libfsm/dot.cc @@ -0,0 +1,399 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "dot.h" +#include "gendata.h" +#include "inputdata.h" +#include "parsedata.h" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::endl; + +void GraphvizDotGen::key( Key key ) +{ + if ( id->displayPrintables && key.isPrintable() ) { + // Output values as characters, ensuring we escape the quote (") character + char cVal = (char) key.getVal(); + switch ( cVal ) { + case '"': case '\\': + out << "'\\" << cVal << "'"; + break; + case '\a': + out << "'\\\\a'"; + break; + case '\b': + out << "'\\\\b'"; + break; + case '\t': + out << "'\\\\t'"; + break; + case '\n': + out << "'\\\\n'"; + break; + case '\v': + out << "'\\\\v'"; + break; + case '\f': + out << "'\\\\f'"; + break; + case '\r': + out << "'\\\\r'"; + break; + case ' ': + out << "SP"; + break; + default: + out << "'" << cVal << "'"; + break; + } + } + else { + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); + } +} + +void GraphvizDotGen::condSpec( CondSpace *condSpace, long condVals ) +{ + if ( condSpace != 0 ) { + out << "("; + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + bool set = condVals & (1 << csi.pos()); + if ( !set ) + out << "!"; + (*csi)->actionName( out ); + if ( !csi.last() ) + out << ", "; + } + out << ")"; + } +} + +void GraphvizDotGen::onChar( Key lowKey, Key highKey, CondSpace *condSpace, long condVals ) +{ + /* Output the key. Possibly a range. */ + key( lowKey ); + if ( keyOps->ne( highKey, lowKey ) ) { + out << ".."; + key( highKey ); + } + + condSpec( condSpace, condVals ); +} + + +void GraphvizDotGen::fromStateAction( StateAp *fromState ) +{ + int n = 0; + ActionTable *actionTables[3] = { 0, 0, 0 }; + + if ( fromState->fromStateActionTable.length() != 0 ) + actionTables[n++] = &fromState->fromStateActionTable; + + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( ActionTable::Iter actIt = actionTables[a]->first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + action->actionName( out ); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } + + if ( n > 0 ) + out << " / "; +} + +void GraphvizDotGen::transAction( StateAp *fromState, TransData *trans ) +{ + int n = 0; + ActionTable *actionTables[3] = { 0, 0, 0 }; + + if ( trans->actionTable.length() != 0 ) + actionTables[n++] = &trans->actionTable; + if ( trans->toState != 0 && trans->toState->toStateActionTable.length() != 0 ) + actionTables[n++] = &trans->toState->toStateActionTable; + + if ( n > 0 ) + out << " / "; + + /* Loop the existing actions and write out what's there. */ + for ( int a = 0; a < n; a++ ) { + for ( ActionTable::Iter actIt = actionTables[a]->first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + action->actionName( out ); + if ( a < n-1 || !actIt.last() ) + out << ", "; + } + } +} + +void GraphvizDotGen::action( ActionTable *actionTable ) +{ + /* The action. */ + out << " / "; + for ( ActionTable::Iter actIt = actionTable->first(); actIt.lte(); actIt++ ) { + Action *action = actIt->value; + action->actionName( out ); + if ( !actIt.last() ) + out << ", "; + } +} + +void GraphvizDotGen::transList( StateAp *state ) +{ + /* Build the set of unique transitions out of this state. */ + RedTransSet stTransSet; + for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { + if ( tel->plain() ) { + TransDataAp *tdap = tel->tdap(); + + /* Write out the from and to states. */ + out << "\t" << state->alg.stateNum << " -> "; + + if ( tdap->toState == 0 ) + out << "err_" << state->alg.stateNum; + else + out << tdap->toState->alg.stateNum; + + /* Begin the label. */ + out << " [ label = \""; + + fromStateAction( state ); + + onChar( tel->lowKey, tel->highKey, 0, 0 ); + + /* Write the action and close the transition. */ + transAction( state, tdap ); + + out << "\" ];\n"; + } + else { + for ( CondList::Iter ctel = tel->tcap()->condList; ctel.lte(); ctel++ ) { + /* Write out the from and to states. */ + out << "\t" << state->alg.stateNum << " -> "; + + if ( ctel->toState == 0 ) + out << "err_" << state->alg.stateNum; + else + out << ctel->toState->alg.stateNum; + + /* Begin the label. */ + out << " [ label = \""; + + fromStateAction( state ); + + onChar( tel->lowKey, tel->highKey, tel->condSpace, ctel->key.getVal() ); + + /* Write the action and close the transition. */ + transAction( state, ctel ); + out << "\" ];\n"; + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter nfa = *state->nfaOut; nfa.lte(); nfa++ ) { + out << "\t" << state->alg.stateNum << + " -> " << nfa->toState->alg.stateNum << + " [ label = \"EP," << nfa->order << " "; + + fromStateAction( state ); + +// if ( nfa->popTest.length() > 0 || +// nfa->popAction.length() > 0 || +// nfa->popCondKeys.length() > 0 ) +// { +// out << " / "; +// } + + if ( nfa->popCondKeys.length() > 0 ) { + for ( CondKeySet::Iter key = nfa->popCondKeys; key.lte(); key++ ) { + out << "("; + long condVals = *key; + for ( CondSet::Iter csi = nfa->popCondSpace->condSet; csi.lte(); csi++ ) { + bool set = condVals & (1 << csi.pos()); + if ( !set ) + out << "!"; + (*csi)->actionName( out ); + if ( !csi.last() ) + out << ", "; + } + out << ") "; + } + } + + if ( nfa->popAction.length() > 0 ) { + for ( ActionTable::Iter pa = nfa->popAction; pa.lte(); pa++ ) { + pa->value->actionName( out ); + if ( !pa.last() ) + out << ","; + } + } + + if ( nfa->popTest.length() > 0 ) { + for ( ActionTable::Iter pt = nfa->popTest; pt.lte(); pt++ ) { + pt->value->actionName( out ); + if ( !pt.last() ) + out << ","; + } + } + + out << "\" ];"; + } + } +} + +bool GraphvizDotGen::makeNameInst( std::string &res, NameInst *nameInst ) +{ + bool written = false; + if ( nameInst->parent != 0 ) + written = makeNameInst( res, nameInst->parent ); + + if ( !nameInst->name.empty() ) { + if ( written ) + res += '_'; + res += nameInst->name; + written = true; + } + + return written; +} + +void GraphvizDotGen::write( ) +{ + out << + "digraph " << fsmName << " {\n" + " rankdir=LR;\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << " node [ shape = point ];\n"; + + if ( fsm->startState != 0 ) + out << " ENTRY;\n"; + + /* Psuedo states for entry points in the entry map. */ + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + StateAp *state = en->value; + out << " en_" << state->alg.stateNum << ";\n"; + } + + /* Psuedo states for final states with eof actions. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + //if ( st->eofTrans != 0 && st->eofTrans->action != 0 ) + // out << " eof_" << st->id << ";\n"; + if ( st->eofActionTable.length() > 0 ) + out << " eof_" << st->alg.stateNum << ";\n"; + } + + out << " node [ shape = circle, height = 0.2 ];\n"; + + /* Psuedo states for states whose default actions go to error. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + bool needsErr = false; + for ( TransList::Iter tel = st->outList; tel.lte(); tel++ ) { + if ( tel->plain() ) { + if ( tel->tdap()->toState == 0 ) { + needsErr = true; + break; + } + } + else { + for ( CondList::Iter ctel = tel->tcap()->condList; ctel.lte(); ctel++ ) { + if ( ctel->toState == 0 ) { + needsErr = true; + break; + } + } + } + } + + if ( needsErr ) + out << " err_" << st->alg.stateNum << " [ label=\"\"];\n"; + } + + /* Attributes common to all nodes, plus double circle for final states. */ + out << " node [ fixedsize = true, height = 0.65, shape = doublecircle ];\n"; + + /* List Final states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->isFinState() ) + out << " " << st->alg.stateNum << ";\n"; + } + + /* List transitions. */ + out << " node [ shape = circle ];\n"; + + /* Walk the states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + transList( st ); + + /* Transitions into the start state. */ + if ( fsm->startState != 0 ) + out << " ENTRY -> " << fsm->startState->alg.stateNum << " [ label = \"IN\" ];\n"; + + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + NameInst *nameInst = fsmCtx->nameIndex[en->key]; + std::string name; + makeNameInst( name, nameInst ); + StateAp *state = en->value; + out << " en_" << state->alg.stateNum << + " -> " << state->alg.stateNum << + " [ label = \"" << name << "\" ];\n"; + } + + /* Out action transitions. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->eofActionTable.length() != 0 ) { + out << " " << st->alg.stateNum << " -> eof_" << + st->alg.stateNum << " [ label = \"EOF"; + + for ( CondKeySet::Iter i = st->outCondKeys; i.lte(); i++ ) { + if ( i.pos() > 0 ) + out << "|"; + condSpec( st->outCondSpace, *i ); + } + + action( &st->eofActionTable ); + out << "\" ];\n"; + } + } + + out << + "}\n"; +} + +void InputData::writeDot( ostream &out ) +{ + ParseData *pd = dotGenPd; + GraphvizDotGen dotGen( this, pd->fsmCtx, pd->sectionGraph, pd->sectionName, pd->machineId, out ); + dotGen.write(); +} diff --git a/libfsm/dot.h b/libfsm/dot.h new file mode 100644 index 00000000..13f53532 --- /dev/null +++ b/libfsm/dot.h @@ -0,0 +1,53 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _GVDOTGEN_H +#define _GVDOTGEN_H + +#include <iostream> +#include "gendata.h" + + +class GraphvizDotGen : public RedBase +{ +public: + GraphvizDotGen( FsmGbl *id, FsmCtx *fsmCtx, FsmAp *fsm, + std::string fsmName, int machineId, std::ostream &out ) + : + RedBase(id, fsmCtx, fsm, fsmName, machineId), + out(out) + {} + + bool makeNameInst( std::string &res, NameInst *nameInst ); + void action( ActionTable *actionTable ); + void transAction( StateAp *fromState, TransData *trans ); + void key( Key key ); + void condSpec( CondSpace *condSpace, long condVals ); + void onChar( Key lowKey, Key highKey, CondSpace *condSpace, long condVals ); + void transList( StateAp *state ); + void write(); + void fromStateAction( StateAp *fromState ); + + ostream &out; +}; + +#endif diff --git a/libfsm/flat.cc b/libfsm/flat.cc new file mode 100644 index 00000000..8cda30db --- /dev/null +++ b/libfsm/flat.cc @@ -0,0 +1,576 @@ +/* + * Copyright 2004-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "flat.h" +#include "redfsm.h" +#include "gendata.h" + +void Flat::genAnalysis() +{ + redFsm->sortByStateId(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Do flat expand. */ + redFsm->makeFlatClass(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + setKeyType(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + +void Flat::tableDataPass() +{ + if ( type == Flat::Loop ) { + if ( redFsm->anyActions() ) + taActions(); + } + + taKeys(); + taCharClass(); + taFlatIndexOffset(); + + taIndices(); + taIndexDefaults(); + taTransCondSpaces(); + + if ( red->condSpaceList.length() > 0 ) + taTransOffsets(); + + taCondTargs(); + taCondActions(); + + taToStateActions(); + taFromStateActions(); + taEofConds(); + taEofActions(); + taEofTrans(); + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Flat::writeData() +{ + if ( type == Flat::Loop ) { + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) + taActions(); + } + + taKeys(); + taCharClass(); + taFlatIndexOffset(); + + taIndices(); + taIndexDefaults(); + taTransCondSpaces(); + if ( red->condSpaceList.length() > 0 ) + taTransOffsets(); + taCondTargs(); + taCondActions(); + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + taEofConds(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + if ( redFsm->anyEofTrans() ) + taEofTrans(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + + +void Flat::setKeyType() +{ + transKeys.setType( ALPH_TYPE(), alphType->size, alphType->isChar ); + transKeys.isSigned = keyOps->isSigned; +} + +void Flat::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +void Flat::taFlatIndexOffset() +{ + flatIndexOffset.start(); + + int curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + flatIndexOffset.value( curIndOffset ); + + /* Move the index offset ahead. */ + if ( st->transList != 0 ) + curIndOffset += ( st->high - st->low + 1 ); + } + + flatIndexOffset.finish(); +} + +void Flat::taCharClass() +{ + charClass.start(); + + if ( redFsm->classMap != 0 ) { + long long maxSpan = keyOps->span( redFsm->lowKey, redFsm->highKey ); + + for ( long long pos = 0; pos < maxSpan; pos++ ) + charClass.value( redFsm->classMap[pos] ); + } + + charClass.finish(); +} + +void Flat::taToStateActions() +{ + toStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + TO_STATE_ACTION(st); + } + + toStateActions.finish(); +} + +void Flat::taFromStateActions() +{ + fromStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + FROM_STATE_ACTION( st ); + } + + fromStateActions.finish(); +} + +void Flat::taEofActions() +{ + eofActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + EOF_ACTION( st ); + } + + eofActions.finish(); +} + +void Flat::taEofConds() +{ + /* + * EOF Cond Spaces + */ + eofCondSpaces.start(); + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) + eofCondSpaces.value( st->outCondSpace->condSpaceId ); + else + eofCondSpaces.value( -1 ); + } + eofCondSpaces.finish(); + + /* + * EOF Cond Key Indixes + */ + eofCondKeyOffs.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long off = 0; + if ( st->outCondSpace != 0 ) { + off = curOffset; + curOffset += st->outCondKeys.length(); + } + eofCondKeyOffs.value( off ); + } + + eofCondKeyOffs.finish(); + + /* + * EOF Cond Key Lengths. + */ + eofCondKeyLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long len = 0; + if ( st->outCondSpace != 0 ) + len = st->outCondKeys.length(); + eofCondKeyLens.value( len ); + } + + eofCondKeyLens.finish(); + + /* + * EOF Cond Keys + */ + eofCondKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) { + for ( int c = 0; c < st->outCondKeys.length(); c++ ) { + CondKey key = st->outCondKeys[c]; + eofCondKeys.value( key.getVal() ); + } + } + } + + eofCondKeys.finish(); +} + +void Flat::taEofTrans() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + long *transPos = new long[redFsm->transSet.length()]; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + transPos[trans->id] = t; + } + + eofTrans.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long trans = 0; + + if ( st->eofTrans != 0 ) + trans = transPos[st->eofTrans->id] + 1; + + eofTrans.value( trans ); + } + + eofTrans.finish(); + + delete[] transPtrs; + delete[] transPos; +} + +void Flat::taKeys() +{ + transKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->transList ) { + /* Emit just low key and high key. */ + transKeys.value( st->low ); + transKeys.value( st->high ); + } + else { + /* Emit an impossible range so the driver fails the lookup. */ + transKeys.value( 1 ); + transKeys.value( 0 ); + } + } + + transKeys.finish(); +} + +void Flat::taIndices() +{ + indices.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->transList != 0 ) { + long long span = st->high - st->low + 1; + for ( long long pos = 0; pos < span; pos++ ) + indices.value( st->transList[pos]->id ); + } + } + + indices.finish(); +} + +void Flat::taIndexDefaults() +{ + indexDefaults.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + indexDefaults.value( st->defTrans->id ); + else + indexDefaults.value( 0 ); + } + + indexDefaults.finish(); +} + + +void Flat::taTransCondSpaces() +{ + transCondSpaces.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transPtrs[trans->id] = trans; + } + + /* Keep a count of the num of items in the array written. */ + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + delete[] transPtrs; + + transCondSpaces.finish(); +} + +void Flat::taTransOffsets() +{ + transOffsets.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + int curOffset = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + transOffsets.value( curOffset ); + + curOffset += trans->condFullSize(); + } + + delete[] transPtrs; + + transOffsets.finish(); +} + +void Flat::taCondTargs() +{ + condTargs.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + long fullSize = trans->condFullSize(); + RedCondPair **fullPairs = new RedCondPair*[fullSize]; + for ( long k = 0; k < fullSize; k++ ) + fullPairs[k] = trans->errCond(); + + for ( int c = 0; c < trans->numConds(); c++ ) + fullPairs[trans->outCondKey( c ).getVal()] = trans->outCond( c ); + + for ( int k = 0; k < fullSize; k++ ) { + RedCondPair *cond = fullPairs[k]; + condTargs.value( cond->targ->id ); + } + + delete[] fullPairs; + } + delete[] transPtrs; + + condTargs.finish(); +} + +void Flat::taCondActions() +{ + condActions.start(); + + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Save the position. Needed for eofTargs. */ + RedTransAp *trans = transPtrs[t]; + + long fullSize = trans->condFullSize(); + RedCondPair **fullPairs = new RedCondPair*[fullSize]; + for ( long k = 0; k < fullSize; k++ ) + fullPairs[k] = trans->errCond(); + + for ( int c = 0; c < trans->numConds(); c++ ) + fullPairs[trans->outCondKey( c ).getVal()] = trans->outCond( c ); + + for ( int k = 0; k < fullSize; k++ ) { + RedCondPair *cond = fullPairs[k]; + COND_ACTION( cond ); + } + delete[] fullPairs; + } + delete[] transPtrs; + + condActions.finish(); +} + +/* Write out the array of actions. */ +void Flat::taActions() +{ + actions.start(); + + /* Add in the the empty actions array. */ + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Length first. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + +void Flat::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Flat::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Flat::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + + nfaPopTrans.value( 0 ); + + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + + +void Flat::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + nfaOffsets.value( 0 ); + } + else { + nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + nfaOffsets.finish(); +} + + + + + + + + diff --git a/libfsm/flat.h b/libfsm/flat.h new file mode 100644 index 00000000..1e54f5ab --- /dev/null +++ b/libfsm/flat.h @@ -0,0 +1,94 @@ +/* + * Copyright 2004-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_FLAT_H +#define _C_FLAT_H + +#include <iostream> +#include "codegen.h" +#include "tables.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +class Flat + : public virtual Tables +{ +protected: + enum Type { + Loop = 1, Exp + }; + +public: + Flat( const CodeGenArgs &args, Type type ) + : + Tables( args ), + type(type) + {} + + virtual ~Flat() { } + +protected: + Type type; + + void taKeys(); + void taKeySpans(); + void taCharClass(); + void taActions(); + void taFlatIndexOffset(); + void taIndices(); + void taIndexDefaults(); + void taTransCondSpaces(); + void taTransOffsets(); + void taCondTargs(); + void taCondActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofActions(); + void taEofTrans(); + void taEofConds(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void setKeyType(); + + std::ostream &INDICES(); + std::ostream &TRANS_COND_SPACES(); + std::ostream &TRANS_OFFSETS(); + std::ostream &TRANS_LENGTHS(); + std::ostream &COND_KEYS(); + std::ostream &COND_TARGS(); + std::ostream &COND_ACTIONS(); + + virtual void setTableState( TableArray::State ); + + virtual void genAnalysis(); + virtual void tableDataPass(); + virtual void writeData(); +}; + +#endif diff --git a/libfsm/flatbreak.cc b/libfsm/flatbreak.cc new file mode 100644 index 00000000..08342625 --- /dev/null +++ b/libfsm/flatbreak.cc @@ -0,0 +1,118 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "flatbreak.h" + +void FlatBreak::LOCATE_TRANS() +{ + if ( redFsm->classMap == 0 ) { + out << + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n"; + } + else { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + bool limitLow = keyOps->eq( lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( highKey, keyOps->maxKey ); + + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), "(" + vCS() + "<<1)" ) << ";\n" + " " << inds << " = " << OFFSET( ARR_REF( indices ), + ARR_REF( flatIndexOffset ) + "[" + vCS() + "]" ) << ";\n" + "\n"; + + if ( !limitLow || !limitHigh ) { + out << " if ( "; + + if ( !limitHigh ) + out << GET_KEY() << " <= " << highKey; + + if ( !limitHigh && !limitLow ) + out << " && "; + + if ( !limitLow ) + out << GET_KEY() << " >= " << lowKey; + + out << " ) {\n"; + } + + out << + " " << ic << " = " << CAST("int") << ARR_REF( charClass ) << "[" << CAST("int") << GET_KEY() << + " - " << lowKey << "];\n" + " if ( " << ic << " <= " << CAST("int") << DEREF( ARR_REF( transKeys ), keys.ref() + "+1" ) << " && " << + "" << ic << " >= " << CAST("int") << DEREF( ARR_REF( transKeys ), keys.ref() + "" ) << " )\n" + " " << trans << " = " << CAST(UINT()) << DEREF( ARR_REF( indices ), + inds.ref() + " + " + CAST("int") + "( " + ic.ref() + " - " + CAST("int") + + DEREF( ARR_REF( transKeys ), keys.ref() + "" ) + " ) " ) << "; \n" + " else\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << + "[" << vCS() << "]" << ";\n"; + + if ( !limitLow || !limitHigh ) { + out << + " }\n" + " else {\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n" + " }\n" + "\n"; + } + } + + +} + +void FlatBreak::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + out << + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + out << + " switch ( " << ARR_REF( transCondSpaces ) << "[" << trans << "] ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + if ( condSpace->numTransRefs > 0 ) { + out << " " << CASE( STR(condSpace->condSpaceId) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + } + + out << + " }\n" + " " << cond << " += " << CAST( UINT() ) << "" << cpc << ";\n"; + } +} diff --git a/libfsm/flatbreak.h b/libfsm/flatbreak.h new file mode 100644 index 00000000..23400000 --- /dev/null +++ b/libfsm/flatbreak.h @@ -0,0 +1,72 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_FLATBREAK_H +#define RAGEL_FLATBREAK_H + +#include "flat.h" +#include "actloop.h" +#include "actexp.h" + +struct FlatBreak +: + public Flat, public TabBreak +{ + FlatBreak( const CodeGenArgs &args, Flat::Type type ) + : + Tables( args ), + Flat( args, type ), + TabBreak( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class FlatBreakLoop + : public FlatBreak, public ActLoop +{ +public: + FlatBreakLoop( const CodeGenArgs &args ) + : + Tables( args ), + FlatBreak( args, Flat::Loop ), + ActLoop( args ) + {} +}; + +/* + * FlatBreakExp + */ +class FlatBreakExp + : public FlatBreak, public ActExp +{ +public: + FlatBreakExp( const CodeGenArgs &args ) + : + Tables( args ), + FlatBreak( args, Flat::Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/libfsm/flatgoto.cc b/libfsm/flatgoto.cc new file mode 100644 index 00000000..c3206191 --- /dev/null +++ b/libfsm/flatgoto.cc @@ -0,0 +1,118 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "flatgoto.h" + +void FlatGoto::LOCATE_TRANS() +{ + if ( redFsm->classMap == 0 ) { + out << + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n"; + } + else { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + bool limitLow = keyOps->eq( lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( highKey, keyOps->maxKey ); + + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), "(" + vCS() + "<<1)" ) << ";\n" + " " << inds << " = " << OFFSET( ARR_REF( indices ), + ARR_REF( flatIndexOffset ) + "[" + vCS() + "]" ) << ";\n" + "\n"; + + if ( !limitLow || !limitHigh ) { + out << " if ( "; + + if ( !limitHigh ) + out << GET_KEY() << " <= " << highKey; + + if ( !limitHigh && !limitLow ) + out << " && "; + + if ( !limitLow ) + out << GET_KEY() << " >= " << lowKey; + + out << " ) {\n"; + } + + out << + " " << ic << " = " << CAST("int") << ARR_REF( charClass ) << "[" << CAST("int") << GET_KEY() << + " - " << lowKey << "];\n" + " if ( " << ic << " <= " << CAST("int") << DEREF( ARR_REF( transKeys ), keys.ref() + "+1" ) << " && " << + "" << ic << " >= " << CAST("int") << DEREF( ARR_REF( transKeys ), keys.ref() + "" ) << " )\n" + " " << trans << " = " << CAST(UINT()) << DEREF( ARR_REF( indices ), + inds.ref() + " + " + CAST("int") + "( " + ic.ref() + " - " + CAST("int") + + DEREF( ARR_REF( transKeys ), keys.ref() + "" ) + " ) " ) << "; \n" + " else\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << + "[" << vCS() << "]" << ";\n"; + + if ( !limitLow || !limitHigh ) { + out << + " }\n" + " else {\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n" + " }\n" + "\n"; + } + } + +} + + +void FlatGoto::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + out << + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + out << + " switch ( " << ARR_REF( transCondSpaces ) << "[" << trans << "] ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + if ( condSpace->numTransRefs > 0 ) { + out << " " << CASE( STR(condSpace->condSpaceId) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + } + + out << + " }\n" + " " << cond << " += " << CAST( UINT() ) << "" << cpc << ";\n"; + } +} diff --git a/libfsm/flatgoto.h b/libfsm/flatgoto.h new file mode 100644 index 00000000..e21b6cd9 --- /dev/null +++ b/libfsm/flatgoto.h @@ -0,0 +1,72 @@ +/* + * Copyright 2018-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_FLATGOTO_H +#define RAGEL_FLATGOTO_H + +#include "flat.h" +#include "actloop.h" +#include "actexp.h" + +struct FlatGoto +: + public Flat, public TabGoto +{ + FlatGoto( const CodeGenArgs &args, Flat::Type type ) + : + Tables( args ), + Flat( args, type ), + TabGoto( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class FlatGotoLoop + : public FlatGoto, public ActLoop +{ +public: + FlatGotoLoop( const CodeGenArgs &args ) + : + Tables( args ), + FlatGoto( args, Flat::Loop ), + ActLoop( args ) + {} +}; + +/* + * FlatGotoExp + */ +class FlatGotoExp + : public FlatGoto, public ActExp +{ +public: + FlatGotoExp( const CodeGenArgs &args ) + : + Tables( args ), + FlatGoto( args, Flat::Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/libfsm/flatvar.cc b/libfsm/flatvar.cc new file mode 100644 index 00000000..b747afcd --- /dev/null +++ b/libfsm/flatvar.cc @@ -0,0 +1,119 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "flatvar.h" + +#include "parsedata.h" +#include "inputdata.h" + +void FlatVar::LOCATE_TRANS() +{ + if ( redFsm->classMap == 0 ) { + out << + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n"; + } + else { + long lowKey = redFsm->lowKey.getVal(); + long highKey = redFsm->highKey.getVal(); + + bool limitLow = keyOps->eq( lowKey, keyOps->minKey ); + bool limitHigh = keyOps->eq( highKey, keyOps->maxKey ); + + out << + " " << keys << " = " << OFFSET( ARR_REF( transKeys ), "(" + vCS() + "<<1)" ) << ";\n" + " " << inds << " = " << OFFSET( ARR_REF( indices ), + ARR_REF( flatIndexOffset ) + "[" + vCS() + "]" ) << ";\n" + "\n"; + + if ( !limitLow || !limitHigh ) { + out << " if ( "; + + if ( !limitHigh ) + out << GET_KEY() << " <= " << highKey; + + if ( !limitHigh && !limitLow ) + out << " && "; + + if ( !limitLow ) + out << GET_KEY() << " >= " << lowKey; + + out << " ) {\n"; + } + + out << + " " << ic << " = " << CAST("int") << ARR_REF( charClass ) << "[" << CAST("int") << GET_KEY() << + " - " << lowKey << "];\n" + " if ( " << ic << " <= " << CAST("int") << DEREF( ARR_REF( transKeys ), keys.ref() + "+1" ) << " && " << + "" << ic << " >= " << CAST("int") << DEREF( ARR_REF( transKeys ), keys.ref() + "" ) << " )\n" + " " << trans << " = " << CAST(UINT()) << DEREF( ARR_REF( indices ), + inds.ref() + " + " + CAST("int") + "( " + ic.ref() + " - " + CAST("int") + + DEREF( ARR_REF( transKeys ), keys.ref() + "" ) + " ) " ) << "; \n" + " else\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << + "[" << vCS() << "]" << ";\n"; + + if ( !limitLow || !limitHigh ) { + out << + " }\n" + " else {\n" + " " << trans << " = " << CAST(UINT()) << ARR_REF( indexDefaults ) << "[" << vCS() << "]" << ";\n" + " }\n" + "\n"; + } + } +} + +void FlatVar::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + out << + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + out << + " switch ( " << ARR_REF( transCondSpaces ) << "[" << trans << "] ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + if ( condSpace->numTransRefs > 0 ) { + out << " " << CASE( STR(condSpace->condSpaceId) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + } + + out << + " }\n" + " " << cond << " += " << CAST( UINT() ) << "" << cpc << ";\n"; + } +} diff --git a/libfsm/flatvar.h b/libfsm/flatvar.h new file mode 100644 index 00000000..9cd80eab --- /dev/null +++ b/libfsm/flatvar.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_FLATVAR_H +#define RAGEL_FLATVAR_H + +#include "flat.h" +#include "actloop.h" +#include "actexp.h" + +struct FlatVar +: + public Flat, public TabVar +{ + FlatVar( const CodeGenArgs &args, Flat::Type type ) + : + Tables( args ), + Flat( args, type ), + TabVar( args ) + {} + + void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class FlatVarLoop + : public FlatVar, public ActLoop +{ +public: + FlatVarLoop( const CodeGenArgs &args ) + : + Tables( args ), + FlatVar( args, Flat::Loop ), + ActLoop( args ) + {} +}; + +class FlatVarExp +: + public FlatVar, public ActExp +{ +public: + FlatVarExp( const CodeGenArgs &args ) + : + Tables( args ), + FlatVar( args, Flat::Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/libfsm/fsmap.cc b/libfsm/fsmap.cc new file mode 100644 index 00000000..e38680f3 --- /dev/null +++ b/libfsm/fsmap.cc @@ -0,0 +1,1200 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" +#include <iostream> +using std::endl; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, LongestMatchPart *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmAp::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->priorTable.setPrior( ordering, prior ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->priorTable.setPrior( ordering, prior ); + } + } + } + + if ( startState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *startState->nfaOut; na.lte(); na++ ) + na->priorTable.setPrior( ordering, prior ); + } + + /* If the new start state is final then set the out priority. This follows + * the same convention as setting start action in the out action table of + * a final start state. */ + if ( startState->stateBits & STB_ISFINAL ) + startState->outPriorTable.setPrior( ordering, prior ); + + /* Start fsm priorities are a special case that may require + * minimization afterwards. */ + afterOpMinimize( true ); +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmAp::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->priorTable.setPrior( ordering, prior ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->priorTable.setPrior( ordering, prior ); + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *state->nfaOut; na.lte(); na++ ) + na->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmAp::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter t = (*state)->inTrans; t.lte(); t++ ) + t->priorTable.setPrior( ordering, prior ); + for ( CondInList::Iter t = (*state)->inCond; t.lte(); t++ ) + t->priorTable.setPrior( ordering, prior ); + + if ( (*state)->nfaIn != 0 ) { + for ( NfaInList::Iter na = *(*state)->nfaIn; na.lte(); na++ ) + na->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmAp::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } + + /* If start state is final then add the action to the out action table. + * This means that when the null string is accepted the start action will + * not be bypassed. */ + if ( startState->stateBits & STB_ISFINAL ) + startState->outActionTable.setAction( ordering, action ); + + if ( startState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *startState->nfaOut; na.lte(); na++ ) { + + StateAp *state = na->toState; + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } + + /* If start state is final then add the action to the out action table. + * This means that when the null string is accepted the start action will + * not be bypassed. */ + if ( state->stateBits & STB_ISFINAL ) + state->outActionTable.setAction( ordering, action ); + + } + } + + afterOpMinimize( true ); +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmAp::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmAp::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter t = (*state)->inTrans; t.lte(); t++ ) + t->actionTable.setAction( ordering, action ); + for ( CondInList::Iter t = (*state)->inCond; t.lte(); t++ ) + t->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter t = (*state)->inTrans; t.lte(); t++ ) + t->lmActionTable.setAction( ordering, lmPart ); + for ( CondInList::Iter t = (*state)->inCond; t.lte(); t++ ) + t->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmAp::fillGaps( StateAp *state ) +{ + /* + * First pass fills in the the caps between transitions. + */ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, ctx->keyOps->minKey, ctx->keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( ctx->keyOps->lt( ctx->keyOps->minKey, trans->lowKey ) ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + ctx->keyOps->decrement( highKey ); + + attachNewTrans( state, 0, ctx->keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + ctx->keyOps->increment( nextKey ); + + /* Check for a gap from last up to here. */ + if ( ctx->keyOps->lt( nextKey, trans->lowKey ) ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + ctx->keyOps->decrement( highKey ); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( ctx->keyOps->lt( lastHigh, ctx->keyOps->maxKey ) ) { + /* Get a copy of the default. */ + ctx->keyOps->increment( lastHigh ); + + attachNewTrans( state, 0, lastHigh, ctx->keyOps->maxKey ); + } + } + + /* + * Second pass fills in gaps in condition lists. + */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + continue; + + CondList srcList; + srcList.transfer( trans->tcap()->condList ); + + CondList::Iter cond = srcList, next; + + /* Check for gap at the beginning. */ + if ( cond->key > 0 ) { + for ( CondKey key = 0; key < cond->key; key.increment() ) + attachNewCond( trans, state, 0, key ); + } + + next = cond.next(); + trans->tcap()->condList.append( cond ); + + CondKey lastKey = cond->key; + + for ( cond = next; cond.lte(); cond = next ) { + /* Make the next key following the last range. */ + CondKey nextKey = lastKey; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < cond->key ) { + for ( CondKey key = nextKey; key < cond->key; key.increment() ) + attachNewCond( trans, state, 0, key ); + } + + next = cond.next(); + trans->tcap()->condList.append( cond ); + + lastKey = cond->key; + } + + CondKey high = (trans->condSpace == 0) ? + 0 : (1 << trans->condSpace->condSet.length()); + + /* Now check for a gap on the end to fill. */ + if ( lastKey < high ) { + /* Get a copy of the default. */ + lastKey.increment(); + + for ( CondKey key = lastKey; key < high; key.increment() ) + attachNewCond( trans, state, 0, key ); + } + } +} + +void FsmAp::setErrorActions( StateAp *state, const ActionTable &other ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) + trans->tdap()->actionTable.setActions( other ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState == 0 ) + cond->actionTable.setActions( other ); + } + } + } +} + +void FsmAp::setErrorAction( StateAp *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) + trans->tdap()->actionTable.setAction( ordering, action ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState == 0 ) + cond->actionTable.setAction( ordering, action ); + } + } + } +} + + +/* Give a target state for error transitions. */ +void FsmAp::setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->tdap()->fromState, target, trans->tdap() ); + trans->tdap()->actionTable.setActions( orderings, actions, nActs ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( cond->fromState, target, cond ); + cond->actionTable.setActions( orderings, actions, nActs ); + } + } + } + } +} + +void FsmAp::transferOutActions( StateAp *state ) +{ + for ( ActionTable::Iter act = state->outActionTable; act.lte(); act++ ) + state->eofActionTable.setAction( act->key, act->value ); + state->outActionTable.empty(); +} + +void FsmAp::transferErrorActions( StateAp *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + if ( ! state->isFinState() ) + state->eofActionTable.setAction( act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmAp::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); + + afterOpMinimize( true ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmAp::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmAp::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmAp::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmAp::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmAp::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmAp::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); + + afterOpMinimize( true ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmAp::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmAp::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmAp::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmAp::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + startState->toStateActionTable.setAction( ordering, action ); + + afterOpMinimize( true ); +} + +/* Set to state actions in all states. */ +void FsmAp::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmAp::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmAp::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmAp::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + startState->fromStateActionTable.setAction( ordering, action ); + + afterOpMinimize( true ); +} + +void FsmAp::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmAp::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->tdap()->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = cond->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + } + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmAp::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + trans->tdap()->priorTable.empty(); + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) + cond->priorTable.empty(); + } + } + + if ( state->nfaIn != 0 ) { + for ( NfaInList::Iter na = *state->nfaIn; na.lte(); na++ ) + na->priorTable.empty(); + } + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmAp::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->tdap()->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->tdap()->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = cond->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = cond->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + } + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmAp::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & STB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSpace == 0 ); + assert( state->outCondKeys.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & STB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmAp::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + return -1; + } + else if ( pd1->desc->priority > pd2->desc->priority ) { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + return 1; + } + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +int FsmAp::compareCondListBitElim( const CondList &condList1, const CondList &condList2 ) +{ + typedef ValPairIter< PiList<CondAp> > ValPairIterPiListCondAp; + ValPairIterPiListCondAp outPair( condList1, condList2 ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case ValPairIterPiListCondAp::RangeInS1: { + int compareRes = FsmAp::compareCondBitElimPtr<CondAp>( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIterPiListCondAp::RangeInS2: { + int compareRes = FsmAp::compareCondBitElimPtr<CondAp>( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIterPiListCondAp::RangeOverlap: { + int compareRes = FsmAp::compareCondBitElimPtr<CondAp>( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + }} + } + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmAp::compareTransData( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1->condSpace < trans2->condSpace ) + return -1; + else if ( trans2->condSpace < trans1->condSpace ) + return 1; + + if ( trans1->plain() ) { + int compareRes = FsmAp::compareCondDataPtr( trans1->tdap(), trans2->tdap() ); + if ( compareRes != 0 ) + return compareRes; + } + else { + typedef ValPairIter< PiList<CondAp> > ValPairIterPiListCondAp; + ValPairIterPiListCondAp outPair( trans1->tcap()->condList, + trans2->tcap()->condList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case ValPairIterPiListCondAp::RangeInS1: { + int compareRes = FsmAp::compareCondDataPtr<CondAp>( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIterPiListCondAp::RangeInS2: { + int compareRes = FsmAp::compareCondDataPtr<CondAp>( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + } + case ValPairIterPiListCondAp::RangeOverlap: { + int compareRes = FsmAp::compareCondDataPtr<CondAp>( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + }} + } + } + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +template< class Trans > int FsmAp::compareCondData( Trans *trans1, Trans *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +template< class Trans > int FsmAp::compareCondBitElim( Trans *trans1, Trans *trans2 ) +{ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Out condition space and set of vals. */ + if ( state1->outCondSpace < state2->outCondSpace ) + return -1; + else if ( state1->outCondSpace > state2->outCondSpace ) + return 1; + + cmpRes = CmpTable<int>::compare( state1->outCondKeys, + state2->outCondKeys ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + cmpRes = CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + return CmpTable<LongestMatchPart*>::compare( + state1->lmNfaParts, state2->lmNfaParts ); +} + + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmAp::clearOutData( StateAp *state ) +{ + /* Kill the out actions and priorities. */ + state->outCondSpace = 0; + state->outCondKeys.empty(); + state->outActionTable.empty(); + state->outPriorTable.empty(); +} + +bool FsmAp::hasOutData( StateAp *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSpace != 0 || + state->outCondKeys.length() > 0 || + state->outPriorTable.length() > 0 || + state->outCondSpace != 0 ); +} + +/* + * Setting Conditions. + */ + +FsmRes FsmAp::startFsmCondition( Action *condAction, bool sense ) +{ + CondSet set; + CondKeySet vals; + set.insert( condAction ); + vals.append( sense ? 1 : 0 ); + + /* Make sure the start state has no other entry points. */ + isolateStartState( this ); + + FsmRes res = embedCondition( this, startState, set, vals ); + if ( !res.success() ) + return res; + + if ( startState->nfaOut != 0 ) { + /* Only one level. */ + for ( NfaTransList::Iter na = *startState->nfaOut; na.lte(); na++ ) { + res = embedCondition( this, startState, set, vals ); + if ( !res.success() ) + return res; + } + } + + afterOpMinimize( true ); + + return FsmRes( FsmRes::Fsm(), this ); +} + +void FsmAp::allTransCondition( Action *condAction, bool sense ) +{ + CondSet set; + CondKeySet vals; + set.insert( condAction ); + vals.append( sense ? 1 : 0 ); + + for ( StateList::Iter state = stateList; state.lte(); state++ ) + embedCondition( this, state, set, vals ); +} + +void FsmAp::leaveFsmCondition( Action *condAction, bool sense ) +{ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + addOutCondition( *state, condAction, sense ); +} diff --git a/libfsm/fsmattach.cc b/libfsm/fsmattach.cc new file mode 100644 index 00000000..5e7e5e7c --- /dev/null +++ b/libfsm/fsmattach.cc @@ -0,0 +1,857 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +void FsmAp::attachStateDict( StateAp *from, StateAp *to ) +{ + if ( to->stateDictIn == 0 ) + to->stateDictIn = new StateSet; + + bool inserted = to->stateDictIn->insert( from ); + assert( inserted ); + + if ( from != to ) { + if ( misfitAccounting ) { + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +} + +void FsmAp::detachStateDict( StateAp *from, StateAp *to ) +{ + bool removed = to->stateDictIn->remove( from ); + assert( removed ); + + to->foreignInTrans -= 1; + + if ( from != to ) { + if ( misfitAccounting ) { + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +void FsmAp::attachToNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ) +{ + if ( to->nfaIn == 0 ) + to->nfaIn = new NfaInList; + + nfaTrans->fromState = from; + nfaTrans->toState = to; + + attachToInList( from, to, to->nfaIn->head, nfaTrans ); +} + +void FsmAp::detachFromNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ) +{ + nfaTrans->fromState = 0; + nfaTrans->toState = 0; + + detachFromInList( from, to, to->nfaIn->head, nfaTrans ); +} + +template< class Head > void FsmAp::attachToInList( StateAp *from, + StateAp *to, Head *&head, Head *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +template< class Head > void FsmAp::detachFromInList( StateAp *from, StateAp *to, + Head *&head, Head *trans ) +{ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +CondAp *FsmAp::attachNewCond( TransAp *trans, StateAp *from, StateAp *to, CondKey onChar ) +{ + /* Sub-transition for conditions. */ + CondAp *condAp = new CondAp( trans ); + condAp->key = onChar; + trans->tcap()->condList.append( condAp ); + + condAp->fromState = from; + condAp->toState = to; + + /* Attach in list. */ + if ( to != 0 ) + attachToInList( from, to, to->inCond.head, condAp ); + + return condAp; +} + +TransAp *FsmAp::attachNewTrans( StateAp *from, StateAp *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + TransDataAp *retVal = new TransDataAp(); + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + retVal->fromState = from; + retVal->toState = to; + + /* Attach in list. */ + if ( to != 0 ) + attachToInList( from, to, to->inTrans.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmAp::attachTrans( StateAp *from, StateAp *to, TransDataAp *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* For now always attache the one and only condList element. */ + attachToInList( from, to, to->inTrans.head, trans ); + } +} + +void FsmAp::attachTrans( StateAp *from, StateAp *to, CondAp *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* For now always attache the one and only condList element. */ + attachToInList( from, to, to->inCond.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, TransDataAp *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inTrans.head, trans ); + } +} + +void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, CondAp *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inCond.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmAp::detachTrans( StateAp *from, StateAp *to, TransDataAp *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + detachFromInList( from, to, to->inTrans.head, trans ); + } +} + +void FsmAp::detachTrans( StateAp *from, StateAp *to, CondAp *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + detachFromInList( from, to, to->inCond.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmAp::detachState( StateAp *state ) +{ + while ( state->inTrans.head != 0 ) { + /* Get pointers to the trans and the state. */ + TransDataAp *trans = state->inTrans.head; + + StateAp *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + fromState->outList.detach( trans ); + delete trans->tdap(); + } + + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inCond.head != 0 ) { + /* Get pointers to the trans and the state. */ + CondAp *condAp = state->inCond.head; + TransAp *trans = condAp->transAp; + + StateAp *fromState = condAp->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, condAp ); + + trans->tcap()->condList.detach( condAp ); + delete condAp; + + if ( trans->tcap()->condList.length() == 0 ) { + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans->tcap(); + } + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + if ( trans->plain() ) { + detachTrans( state, trans->tdap()->toState, trans->tdap() ); + delete trans->tdap(); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); ) { + CondList::Iter next = cond.next(); + detachTrans( state, cond->toState, cond ); + delete cond; + cond = next; + } + trans->tcap()->condList.abandon(); + delete trans->tcap(); + } + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & STB_ISFINAL ) + finStateSet.remove( state ); + + if ( state->nfaIn != 0 ) { + while ( state->nfaIn->head != 0 ) { + NfaTrans *trans = state->nfaIn->head; + StateAp *fromState = trans->fromState; + + detachFromNfa( fromState, state, trans ); + fromState->nfaOut->detach( trans ); + delete trans; + } + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter t = *state->nfaOut; t.lte(); ) { + NfaTransList::Iter next = t.next(); + detachFromNfa( t->fromState, t->toState, t ); + state->nfaOut->detach( t ); + delete t; + t = next; + } + state->nfaOut->abandon(); + delete state->nfaOut; + state->nfaOut = 0; + } + + if ( state->stateDictIn != 0 ) { + for ( StateSet::Iter s = *state->stateDictIn; s.lte(); s++ ) { + bool removed = (*s)->stateDictEl->stateSet.remove( state ); + assert( removed ); + } + + delete state->stateDictIn; + state->stateDictIn = 0; + } + + if ( state->stateDictEl != 0 ) { + for ( StateSet::Iter s = state->stateDictEl->stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + stateDict.detach( state->stateDictEl ); + delete state->stateDictEl; + state->stateDictEl = 0; + + nfaList.detach( state ); + } +} + +TransDataAp *FsmAp::dupTransData( StateAp *from, TransDataAp *srcTrans ) +{ + /* Make a new transition. */ + TransDataAp *newTrans = new TransDataAp(); + newTrans->condSpace = srcTrans->condSpace; + + attachTrans( from, srcTrans->tdap()->toState, newTrans ); + addInTrans( newTrans, srcTrans->tdap() ); + + return newTrans; +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +TransAp *FsmAp::dupTrans( StateAp *from, TransAp *srcTrans ) +{ + if ( srcTrans->plain() ) { + /* Make a new transition. */ + TransDataAp *newTrans = new TransDataAp(); + newTrans->condSpace = srcTrans->condSpace; + + attachTrans( from, srcTrans->tdap()->toState, newTrans ); + addInTrans( newTrans, srcTrans->tdap() ); + + return newTrans; + } + else { + /* Make a new transition. */ + TransAp *newTrans = new TransCondAp(); + newTrans->condSpace = srcTrans->condSpace; + + for ( CondList::Iter sc = srcTrans->tcap()->condList; sc.lte(); sc++ ) { + /* Sub-transition for conditions. */ + CondAp *newCond = new CondAp( newTrans ); + newCond->key = sc->key; + newTrans->tcap()->condList.append( newCond ); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, sc->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newCond, sc.ptr ); + } + + return newTrans; + } +} + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +CondAp *FsmAp::dupCondTrans( StateAp *from, TransAp *destParent, CondAp *srcTrans ) +{ + /* Sub-transition for conditions. */ + CondAp *newCond = new CondAp( destParent ); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newCond, srcTrans ); + + return newCond; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +template< class Trans > Trans *FsmAp::fsmAttachStates( StateAp *from, + Trans *destTrans, Trans *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + StateAp *toState = srcTrans->toState; + StateAp *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + StateAp *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Setup the in links. */ + for ( StateSet::Iter s = stateSet; s.lte(); s++ ) + attachStateDict( combinState, *s ); + + /* Add to the fill list. */ + nfaList.append( combinState ); + } + + /* Get the state insertted/deleted. */ + StateAp *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +template < class Trans > Trans *FsmAp::mergeTrans( StateAp *from, + Trans *destTrans, Trans *srcTrans ) +{ + Trans *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +CondAp *FsmAp::crossCondTransitions( StateAp *from, TransAp *destParent, + CondAp *destTrans, CondAp *srcTrans ) +{ + CondAp *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + delete destTrans; + retTrans = dupCondTrans( from, destParent, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +TransAp *FsmAp::copyTransForExpansion( StateAp *from, TransAp *srcTrans ) +{ + /* This is the dup without the attach. */ + TransCondAp *newTrans = new TransCondAp(); + newTrans->condSpace = srcTrans->condSpace; + + if ( srcTrans->plain() ) { + TransDataAp *srcData = srcTrans->tdap(); + CondAp *newCond = new CondAp( newTrans ); + newCond->key = 0; + + attachTrans( srcData->fromState, srcData->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + //addInTrans( newCond, srcData ); + + /* Not a copy of ourself, get the functions and priorities. */ + newCond->lmActionTable.setActions( srcData->lmActionTable ); + newCond->actionTable.setActions( srcData->actionTable ); + newCond->priorTable.setPriors( srcData->priorTable ); + + newTrans->condList.append( newCond ); + } + else { + for ( CondList::Iter sc = srcTrans->tcap()->condList; sc.lte(); sc++ ) { + /* Sub-transition for conditions. */ + CondAp *newCond = new CondAp( newTrans ); + newCond->key = sc->key; + + attachTrans( sc->fromState, sc->toState, newCond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newCond, sc.ptr ); + + newTrans->condList.append( newCond ); + } + } + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = srcTrans->lowKey; + newTrans->highKey = srcTrans->highKey; + + return newTrans; +} + +void FsmAp::freeEffectiveTrans( TransAp *trans ) +{ + for ( CondList::Iter sc = trans->tcap()->condList; sc.lte(); ) { + CondList::Iter next = sc.next(); + detachTrans( sc->fromState, sc->toState, sc ); + delete sc; + sc = next; + } + trans->tcap()->condList.abandon(); + delete trans->tcap(); +} + +TransDataAp *FsmAp::crossTransitionsBothPlain( StateAp *from, + TransDataAp *destTrans, TransDataAp *srcTrans ) +{ + /* Neither have cond space and no expansion took place. Cross them. */ + TransDataAp *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + delete destTrans; + retTrans = dupTransData( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +TransAp *FsmAp::crossTransitions( StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + if ( destTrans->plain() && srcTrans->plain() ) { + /* Return the transition that resulted from the cross. */ + return crossTransitionsBothPlain( from, + destTrans->tdap(), srcTrans->tdap() ); + } + else { + /* At least one is non-empty. Target is non-empty. Need to work in + * condition spaced. */ + CondSpace *mergedSpace = expandCondSpace( destTrans, srcTrans ); + + /* If the dest state cond space does not equal the merged, we have to + * rewrite it. If the src state cond space does not equal, we have to + * copy it. */ + + TransAp *effSrcTrans = srcTrans; + + if ( srcTrans->condSpace != mergedSpace ) { + effSrcTrans = copyTransForExpansion( from, srcTrans ); + CondSpace *orig = effSrcTrans->condSpace; + effSrcTrans->condSpace = mergedSpace; + expandConds( from, effSrcTrans, orig, mergedSpace ); + } + + if ( destTrans->condSpace != mergedSpace ) { + /* Make the transition into a conds transition. If dest is a plain + * transition, we have to replace it with a conds transition. */ + if ( destTrans->plain() ) + destTrans = convertToCondAp( from, destTrans->tdap() ); + + /* Now expand the dest. */ + CondSpace *orig = destTrans->condSpace; + destTrans->condSpace = mergedSpace; + expandConds( from, destTrans, orig, mergedSpace ); + } + + /* The destination list. */ + CondList destList; + + /* Set up an iterator to stop at breaks. */ + typedef ValPairIter< PiList<CondAp> > ValPairIterPiListCondAp; + ValPairIterPiListCondAp outPair( destTrans->tcap()->condList, + effSrcTrans->tcap()->condList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case ValPairIterPiListCondAp::RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + CondAp *destCond = outPair.s1Tel.trans; + destCond->key = outPair.s1Tel.key; + destList.append( destCond ); + break; + } + case ValPairIterPiListCondAp::RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + CondAp *newCond = dupCondTrans( from, destTrans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newCond->key = outPair.s2Tel.key; + destList.append( newCond ); + break; + } + case ValPairIterPiListCondAp::RangeOverlap: { + /* Exact overlap, cross them. */ + CondAp *newTrans = crossCondTransitions( from, destTrans, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->key = outPair.s1Tel.key; + destList.append( newTrans ); + break; + }} + } + + /* Abandon the old outList and transfer destList into it. */ + destTrans->tcap()->condList.transfer( destList ); + + /* Delete the duplicate. Don't detach anything. */ + if ( srcTrans != effSrcTrans ) + freeEffectiveTrans( effSrcTrans ); + + return destTrans; + } +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmAp::outTransCopy( StateAp *dest, TransAp *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + typedef RangePairIter< PiList<TransAp> > RangePairIterPiListTransAp; + RangePairIterPiListTransAp outPair( ctx, dest->outList, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangePairIterPiListTransAp::RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + TransAp *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangePairIterPiListTransAp::RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + TransAp *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangePairIterPiListTransAp::RangeOverlap: { + /* Exact overlap, cross them. */ + TransAp *newTrans = crossTransitions( dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case RangePairIterPiListTransAp::BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case RangePairIterPiListTransAp::BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmAp::moveInwardTrans( StateAp *dest, StateAp *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inTrans.head != 0 ) { + /* Get trans and from state. */ + TransDataAp *trans = src->inTrans.head; + StateAp *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } + + /* Move the transitions in inList. */ + while ( src->inCond.head != 0 ) { + /* Get trans and from state. */ + CondAp *trans = src->inCond.head; + StateAp *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } + + /* Move inward nfa links. */ + if ( src->nfaIn != 0 ) { + while ( src->nfaIn->head != 0 ) { + NfaTrans *trans = src->nfaIn->head; + StateAp *fromState = trans->fromState; + + detachFromNfa( fromState, src, trans ); + attachToNfa( fromState, dest, trans ); + } + } +} diff --git a/libfsm/fsmbase.cc b/libfsm/fsmbase.cc new file mode 100644 index 00000000..bdf40279 --- /dev/null +++ b/libfsm/fsmbase.cc @@ -0,0 +1,853 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" +#include "parsedata.h" + +#include <string.h> +#include <assert.h> +#include <iostream> + +FsmCtx::FsmCtx( FsmGbl *fsmGbl ) +: + minimizeLevel(fsmGbl->minimizeLevel), + minimizeOpt(fsmGbl->minimizeOpt), + + /* No limit. */ + stateLimit(STATE_UNLIMITED), + + printStatistics(fsmGbl->printStatistics), + + checkPriorInteraction(fsmGbl->checkPriorInteraction), + + unionOp(false), + + condsCheckDepth(0), + + curActionOrd(0), + curPriorOrd(0), + + nextPriorKey(0), + nextCondId(0), + + fsmGbl(fsmGbl), + generatingSectionSubset(false), + lmRequiresErrorState(false), + nameIndex(0), + + getKeyExpr(0), + accessExpr(0), + prePushExpr(0), + postPopExpr(0), + nfaPrePushExpr(0), + nfaPostPopExpr(0), + pExpr(0), + peExpr(0), + eofExpr(0), + csExpr(0), + topExpr(0), + stackExpr(0), + actExpr(0), + tokstartExpr(0), + tokendExpr(0), + dataExpr(0) +{ + keyOps = new KeyOps; + condData = new CondData; +} + +FsmCtx::~FsmCtx() +{ + delete keyOps; + delete condData; + priorDescList.empty(); + + actionList.empty(); + + if ( getKeyExpr != 0 ) + delete getKeyExpr; + if ( accessExpr != 0 ) + delete accessExpr; + if ( prePushExpr != 0 ) + delete prePushExpr; + if ( postPopExpr != 0 ) + delete postPopExpr; + if ( nfaPrePushExpr != 0 ) + delete nfaPrePushExpr; + if ( nfaPostPopExpr != 0 ) + delete nfaPostPopExpr; + if ( pExpr != 0 ) + delete pExpr; + if ( peExpr != 0 ) + delete peExpr; + if ( eofExpr != 0 ) + delete eofExpr; + if ( csExpr != 0 ) + delete csExpr; + if ( topExpr != 0 ) + delete topExpr; + if ( stackExpr != 0 ) + delete stackExpr; + if ( actExpr != 0 ) + delete actExpr; + if ( tokstartExpr != 0 ) + delete tokstartExpr; + if ( tokendExpr != 0 ) + delete tokendExpr; + if ( dataExpr != 0 ) + delete dataExpr; +} + +/* Graph constructor. */ +FsmAp::FsmAp( FsmCtx *ctx ) +: + ctx( ctx ), + + priorInteraction(false), + + /* No start state. */ + startState(0), + errState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmAp::FsmAp( const FsmAp &graph ) +: + ctx( graph.ctx ), + + priorInteraction(false), + + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + errState(0), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + StateAp *newState = new StateAp( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + StateAp *toState = trans->tdap()->toState != 0 ? + trans->tdap()->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->tdap()->toState = 0; + attachTrans( state, toState, trans->tdap() ); + + } + else { + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + StateAp *toState = cti->toState != 0 ? cti->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + cti->toState = 0; + attachTrans( state, toState, cti ); + } + } + } + + /* Fix the eofTarg, if set. */ + if ( state->eofTarget != 0 ) + state->eofTarget = state->eofTarget->alg.stateMap; + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + StateAp *targ = n->toState->alg.stateMap; + n->toState = 0; + attachToNfa( state, targ, n ); + } + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmAp::~FsmAp() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + for ( TransList::Iter n, t = state->outList; t.lte(); ) { + n = t.next(); + if ( t->plain() ) + delete t->tdap(); + else + delete t->tcap(); + t = n; + } + state->outList.abandon(); + + if ( state->nfaIn != 0 ) { + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + state->nfaOut->empty(); + delete state->nfaOut; + state->nfaOut = 0; + } + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmAp::setFinState( StateAp *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & STB_ISFINAL ) + return; + + state->stateBits |= STB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmAp::unsetFinState( StateAp *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & STB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ STB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmAp::setStartState( StateAp *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmAp::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmAp::setEntry( int id, StateAp *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmAp::unsetEntry( int id, StateAp *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmAp::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmAp::changeEntry( int id, StateAp *to, StateAp *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmAp::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmAp::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmAp::markReachableFromHere( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & STB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= STB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + markReachableFromHere( trans->tdap()->toState ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + markReachableFromHere( cond->toState ); + } + } + } + + /* Recurse on all states that compose us. */ + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter st = *state->nfaOut; st.lte(); st++ ) + markReachableFromHere( st->toState ); + } + + if ( state->stateDictEl != 0 ) { + for ( StateSet::Iter ss = state->stateDictEl->stateSet; ss.lte(); ss++ ) + markReachableFromHere( *ss ); + } +} + +/* Any transitions to another state? */ +bool FsmAp::anyRegularTransitions( StateAp *state ) +{ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + StateAp *toState = trans->tdap()->toState; + if ( toState != 0 ) + return true; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + StateAp *toState = cond->toState; + if ( toState != 0 ) + return true; + } + } + } + return false; +} + +void FsmAp::markReachableFromHereStopFinal( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & STB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= STB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + StateAp *toState = trans->tdap()->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + StateAp *toState = cond->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } + } + } + + /* Recurse on all states that compose us. */ + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter st = *state->nfaOut; st.lte(); st++ ) + markReachableFromHereStopFinal( st->toState ); + } + + if ( state->stateDictEl != 0 ) { + for ( StateSet::Iter ss = state->stateDictEl->stateSet; ss.lte(); ss++ ) + markReachableFromHereStopFinal( *ss ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmAp::markReachableFromHereReverse( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & STB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= STB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter t = state->inTrans; t.lte(); t++ ) + markReachableFromHereReverse( t->fromState ); + for ( CondInList::Iter t = state->inCond; t.lte(); t++ ) + markReachableFromHereReverse( t->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmAp::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inTrans.head != 0 ) + return false; + if ( startState->inCond.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmAp::copyInEntryPoints( FsmAp *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + + +void FsmAp::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ STB_ISFINAL; + finStateSet.empty(); +} + +void FsmAp::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + +void FsmAp::unsetFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits &= ~ finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmAp::verifyIntegrity() +{ + int count = 0; + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + assert( trans->tdap()->fromState == state ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + assert( cond->fromState == state ); + } + } + } + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter t = state->inTrans; t.lte(); t++ ) { + assert( t->toState == state ); + } + for ( CondInList::Iter t = state->inCond; t.lte(); t++ ) { + assert( t->toState == state ); + } + + count += 1; + } + + assert( stateList.length() == count ); +} + +void FsmAp::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & STB_ISMARKED ); + st->stateBits &= ~ STB_ISMARKED; + } +} + +void FsmAp::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= STB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & STB_ISMARKED ); + st->stateBits &= ~ STB_ISMARKED; + } +} + +void FsmAp::depthFirstOrdering( StateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & STB_ONLIST ) + return; + + /* Doing depth first, put state on the list. */ + state->stateBits |= STB_ONLIST; + stateList.append( state ); + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) + depthFirstOrdering( trans->tdap()->toState ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) + depthFirstOrdering( cond->toState ); + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter s = *state->nfaOut; s.lte(); s++ ) + depthFirstOrdering( s->toState ); + } +} + +/* Ordering states by transition connections. */ +void FsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->stateBits &= ~STB_ONLIST; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( errState != 0 ) + depthFirstOrdering( errState ); + + depthFirstOrdering( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( en->value ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Stable sort the states by final state status. */ +void FsmAp::sortStatesByFinal() +{ + /* Move forward through the list and move final states onto the end. */ + StateAp *state = 0; + StateAp *next = stateList.head; + StateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinState() ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +void FsmAp::setStateNumbers( int base ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->alg.stateNum = base++; +} + +bool FsmAp::checkErrTrans( StateAp *state, CondAp *trans ) +{ + /* Might go directly to error state. */ + if ( trans->toState == 0 ) + return true; + + return false; +} + +bool FsmAp::checkErrTrans( StateAp *state, TransAp *trans ) +{ + /* + * Look for a gap between this transition and the previous. + */ + if ( trans->prev == 0 ) { + /* If this is the first transition. */ + if ( ctx->keyOps->lt( ctx->keyOps->minKey, trans->lowKey ) ) + return true; + } + else { + /* Not the first transition. Compare against the prev. */ + TransAp *prev = trans->prev; + Key nextKey = prev->highKey; + ctx->keyOps->increment( nextKey ); + if ( ctx->keyOps->lt( nextKey, trans->lowKey ) ) + return true; + } + + if ( trans->plain() ) { + if ( trans->tdap()->toState == 0 ) + return true; + } + else { + /* Check for gaps in the condition list. */ + if ( trans->tcap()->condList.length() < trans->condFullSize() ) + return true; + + /* Check all destinations. */ + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + if ( checkErrTrans( state, cti ) ) + return true; + } + } + + return false; +} + +bool FsmAp::checkErrTransFinish( StateAp *state ) +{ + /* Check if there are any ranges already. */ + if ( state->outList.length() == 0 ) + return true; + else { + /* Get the last and check for a gap on the end. */ + TransAp *last = state->outList.tail; + if ( ctx->keyOps->lt( last->highKey, ctx->keyOps->maxKey ) ) + return true; + } + return 0; +} + +bool FsmAp::hasErrorTrans() +{ + bool result; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { + result = checkErrTrans( st, tr ); + if ( result ) + return true; + } + result = checkErrTransFinish( st ); + if ( result ) + return true; + } + return false; +} diff --git a/libfsm/fsmcond.cc b/libfsm/fsmcond.cc new file mode 100644 index 00000000..b2339c12 --- /dev/null +++ b/libfsm/fsmcond.cc @@ -0,0 +1,520 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Setting conditions and merging states with conditions are similar activities + * when expressed in code. The critical difference is that a merge is a union + * of multiple paths. We have to take both paths. Setting a condition, however, + * is a restriction. We have to expand the transition to follow both values of + * the condition, then remove the one that is not set. + */ + +#include "fsmgraph.h" +#include "mergesort.h" +#include "parsedata.h" + +#include <assert.h> +#include <iostream> + +long TransAp::condFullSize() + { return condSpace == 0 ? 1 : condSpace->fullSize(); } + +void FsmAp::expandCondKeys( CondKeySet &condKeys, CondSpace *fromSpace, + CondSpace *mergedSpace ) +{ + CondSet fromCS, mergedCS; + + if ( fromSpace != 0 ) + fromCS.insert( fromSpace->condSet ); + + if ( mergedSpace != 0 ) + mergedCS.insert( mergedSpace->condSet ); + + /* Need to transform condition element to the merged set. */ + for ( int cti = 0; cti < condKeys.length(); cti++ ) { + long origVal = condKeys[cti]; + long newVal = 0; + + /* Iterate the bit positions in the from set. */ + for ( CondSet::Iter csi = fromCS; csi.lte(); csi++ ) { + /* If set, find it in the merged set and flip the bit to 1. */ + if ( origVal & (1 << csi.pos()) ) { + /* The condition is set. Find the bit position in the merged + * set. */ + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + newVal |= 1 << bitPos; + } + } + + if ( origVal != newVal ) + condKeys[cti] = newVal; + } + + /* Need to double up the whole transition list for each condition test in + * merged that is not in from. The one we add has the bit in question set. + * */ + for ( CondSet::Iter csi = mergedCS; csi.lte(); csi++ ) { + Action **cim = fromCS.find( *csi ); + if ( cim == 0 ) { + CondKeySet newItems; + newItems.append( condKeys ); + for ( int cti = 0; cti < condKeys.length(); cti++ ) { + int key = condKeys[cti] | (1 << csi.pos()); + newItems.insert( key ); + } + + condKeys.setAs( newItems ); + } + } +} + +void FsmAp::expandConds( StateAp *fromState, TransAp *trans, + CondSpace *fromSpace, CondSpace *mergedSpace ) +{ + CondSet fromCS, mergedCS; + + if ( fromSpace != 0 ) + fromCS.insert( fromSpace->condSet ); + + if ( mergedSpace != 0 ) + mergedCS.insert( mergedSpace->condSet ); + + /* Need to transform condition element to the merged set. */ + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + long origVal = cti->key.getVal(); + long newVal = 0; + + /* Iterate the bit positions in the from set. */ + for ( CondSet::Iter csi = fromCS; csi.lte(); csi++ ) { + /* If set, find it in the merged set and flip the bit to 1. */ + if ( origVal & (1 << csi.pos()) ) { + /* The condition is set. Find the bit position in the merged + * set. */ + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + newVal |= 1 << bitPos; + } + } + + if ( origVal != newVal ) + cti->key = newVal; + } + + /* Need to double up the whole transition list for each condition test in + * merged that is not in from. The one we add has the bit in question set. + * */ + for ( CondSet::Iter csi = mergedCS; csi.lte(); csi++ ) { + Action **cim = fromCS.find( *csi ); + if ( cim == 0 ) { + CondList newItems; + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + /* Sub-transition for conditions. */ + CondAp *cond = new CondAp( trans ); + + /* Attach only if our caller wants the expanded transitions + * attached. */ + attachTrans( fromState, cti->toState, cond ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( cond, cti.ptr ); + + cond->key = cti->key.getVal() | (1 << csi.pos()); + + newItems.append( cond ); + } + + /* Merge newItems in. Both the condList and newItems are sorted. Make + * a sorted list out of them. */ + CondAp *dest = trans->tcap()->condList.head; + while ( dest != 0 && newItems.head != 0 ) { + if ( newItems.head->key.getVal() > dest->key.getVal() ) { + dest = dest->next; + } + else { + /* Pop the item for insertion. */ + CondAp *ins = newItems.detachFirst(); + trans->tcap()->condList.addBefore( dest, ins ); + } + } + + /* Append the rest of the items. */ + trans->tcap()->condList.append( newItems ); + } + } +} + +CondSpace *FsmAp::expandCondSpace( TransAp *destTrans, TransAp *srcTrans ) +{ + CondSet destCS, srcCS; + CondSet mergedCS; + + if ( destTrans->condSpace != 0 ) + destCS.insert( destTrans->condSpace->condSet ); + + if ( srcTrans->condSpace != 0 ) + srcCS.insert( srcTrans->condSpace->condSet ); + + mergedCS.insert( destCS ); + mergedCS.insert( srcCS ); + + return addCondSpace( mergedCS ); +} + +StateAp *FsmAp::copyStateForExpansion( StateAp *srcState ) +{ + StateAp *newState = new StateAp(); + newState->outCondSpace = srcState->outCondSpace; + newState->outCondKeys = srcState->outCondKeys; + return newState; +} + +void FsmAp::mergeOutConds( StateAp *destState, StateAp *srcState, bool leaving ) +{ + if ( destState == srcState ) + return; + + bool bothFinal = destState->isFinState() && srcState->isFinState(); + bool unionOp = !leaving; + + CondSet destCS, srcCS; + CondSet mergedCS; + + if ( destState->outCondSpace != 0 ) + destCS.insert( destState->outCondSpace->condSet ); + + if ( srcState->outCondSpace != 0 ) + srcCS.insert( srcState->outCondSpace->condSet ); + + mergedCS.insert( destCS ); + mergedCS.insert( srcCS ); + + if ( mergedCS.length() > 0 ) { + CondSpace *mergedSpace = addCondSpace( mergedCS ); + + CondSpace *srcSpace = srcState->outCondSpace; + CondKeySet srcKeys = srcState->outCondKeys; + + if ( srcSpace != mergedSpace ) { + /* Prep the key list with zero item if necessary. */ + if ( srcSpace == 0 ) + srcKeys.append( 0 ); + + expandCondKeys( srcKeys, srcSpace, mergedSpace ); + } + + if ( destState->outCondSpace != mergedSpace ) { + /* Prep the key list with zero item if necessary. */ + if ( destState->outCondSpace == 0 ) + destState->outCondKeys.append( 0 ); + + /* Now expand the dest. */ + expandCondKeys( destState->outCondKeys, destState->outCondSpace, mergedSpace ); + } + + destState->outCondSpace = mergedSpace; + + if ( unionOp && bothFinal ) { + /* Keys can come from either. */ + for ( CondKeySet::Iter c = srcKeys; c.lte(); c++ ) + destState->outCondKeys.insert( *c ); + } + else { + /* Keys need to be in both sets. */ + for ( long c = 0; c < destState->outCondKeys.length(); ) { + if ( !srcKeys.find( destState->outCondKeys[c] ) ) + destState->outCondKeys.CondKeyVect::remove( c, 1 ); + else + c++; + } + } + } +} + +CondSpace *FsmAp::addCondSpace( const CondSet &condSet ) +{ + CondSpace *condSpace = ctx->condData->condSpaceMap.find( condSet ); + if ( condSpace == 0 ) { + condSpace = new CondSpace( condSet ); + ctx->condData->condSpaceMap.insert( condSpace ); + } + return condSpace; +} + +TransDataAp *FsmAp::convertToTransAp( StateAp *from, CondAp *cond ) +{ + TransDataAp *newTrans = new TransDataAp(); + newTrans->lowKey = cond->transAp->lowKey; + newTrans->highKey = cond->transAp->highKey; + + newTrans->lmActionTable.setActions( cond->lmActionTable ); + newTrans->actionTable.setActions( cond->actionTable ); + newTrans->priorTable.setPriors( cond->priorTable ); + + attachTrans( from, cond->toState, newTrans ); + + /* Detach in list. */ + detachTrans( from, cond->toState, cond ); + delete cond->transAp; + delete cond; + + return newTrans; +} + +TransCondAp *FsmAp::convertToCondAp( StateAp *from, TransDataAp *trans ) +{ + TransCondAp *newTrans = new TransCondAp(); + newTrans->lowKey = trans->lowKey; + newTrans->highKey = trans->highKey; + newTrans->condSpace = trans->condSpace; + + CondAp *newCond = new CondAp( newTrans ); + newCond->key = 0; + newTrans->condList.append( newCond ); + + newCond->lmActionTable.setActions( trans->lmActionTable ); + newCond->actionTable.setActions( trans->actionTable ); + newCond->priorTable.setPriors( trans->priorTable ); + + attachTrans( from, trans->toState, newCond ); + + /* Detach in list. */ + detachTrans( from, trans->toState, trans ); + delete trans; + + return newTrans; +} + +void FsmAp::convertToCondAp( StateAp *state ) +{ + /* First replace TransDataAp with cond versions. */ + TransList destList; + for ( TransList::Iter tr = state->outList; tr.lte(); ) { + TransList::Iter next = tr.next(); + if ( tr->plain() ) { + TransCondAp *newTrans = convertToCondAp( state, tr->tdap() ); + destList.append( newTrans ); + } + else { + destList.append( tr ); + } + + tr = next; + } + + state->outList.abandon(); + state->outList.transfer( destList ); +} + +void FsmAp::doEmbedCondition( StateAp *state, + const CondSet &set, const CondKeySet &vals ) +{ + convertToCondAp( state ); + + for ( TransList::Iter tr = state->outList; tr.lte(); tr++ ) { + + /* The source (being embedded). */ + CondSpace *srcSpace = addCondSpace( set ); + CondKeySet srcVals = vals; + + /* Extract cond key set from the condition list. We will use this to + * compute the intersection of the cond keys. */ + CondSpace *trSpace = tr->condSpace; + CondKeySet trVals; + if ( tr->condSpace == 0 ) + trVals.append( 0 ); + else { + for ( CondList::Iter cti = tr->tcap()->condList; cti.lte(); cti++ ) { + long key = cti->key.getVal(); + trVals.append( key ); + } + } + + /* Construct merged. */ + CondSet mergedCS; + if ( tr->condSpace != 0 ) + mergedCS.insert( tr->condSpace->condSet ); + mergedCS.insert( set ); + + CondSpace *mergedSpace = addCondSpace( mergedCS ); + + if ( srcSpace != mergedSpace ) { + /* Prep the key list with zero item if necessary. */ + if ( srcSpace == 0 ) + srcVals.append( 0 ); + + expandCondKeys( srcVals, srcSpace, mergedSpace ); + } + + if ( trSpace != mergedSpace ) { + /* Don't need to prep the key list with zero item, will be there + * (see above). */ + expandCondKeys( trVals, trSpace, mergedSpace ); + } + + /* Implement AND, in two parts. */ + CondKeySet newItems; + for ( CondKeySet::Iter c = srcVals; c.lte(); c++ ) { + if ( trVals.find( *c ) ) + newItems.insert( *c ); + } + + for ( CondKeySet::Iter c = trVals; c.lte(); c++ ) { + if ( srcVals.find( *c ) ) + newItems.insert( *c ); + } + + /* Expand the transitions, then we remove anything not in the computed + * list of keys. This approach allows us to embed combinations of + * senses, rather than cond-sense pairs. Necessary for out conditions. */ + CondSpace *orig = tr->condSpace; + tr->condSpace = mergedSpace; + expandConds( state, tr, orig, mergedSpace ); + + /* After expansion, remove anything not in newItems. */ + for ( CondList::Iter cti = tr->tcap()->condList; cti.lte(); ) { + long key = cti->key.getVal(); + + if ( !newItems.find( key ) ) { + /* Delete. */ + CondList::Iter next = cti.next(); + + CondAp *cond = cti; + detachTrans( state, cond->toState, cond ); + tr->tcap()->condList.detach( cond ); + delete cond; + + cti = next; + } + else { + /* Leave alone. */ + cti++; + } + } + } +} + +FsmRes FsmAp::embedCondition( FsmAp *fsm, StateAp *state, const CondSet &set, const CondKeySet &vals ) +{ + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* Worker. */ + fsm->doEmbedCondition( state, set, vals ); + + /* Fill in any states that were newed up as combinations of others. */ + FsmRes res = fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return res; +} + +void FsmAp::addOutCondition( StateAp *state, Action *condAction, bool sense ) +{ + CondSet origCS; + if ( state->outCondSpace != 0 ) + origCS.insert( state->outCondSpace->condSet ); + + CondSet mergedCS; + mergedCS.insert( origCS ); + + bool added = mergedCS.insert( condAction ); + if ( !added ) { + + /* Already exists in the cond set. For every transition, if the + * sense is identical to what we are embedding, leave it alone. If + * the sense is opposite, delete it. */ + + /* Find the position. */ + long pos = 0; + for ( CondSet::Iter csi = mergedCS; csi.lte(); csi++ ) { + if ( *csi == condAction ) + pos = csi.pos(); + } + + for ( int cti = 0; cti < state->outCondKeys.length(); ) { + long key = state->outCondKeys[cti]; + + bool set = ( key & ( 1 << pos ) ) != 0; + if ( sense xor set ) { + /* Delete. */ + state->outCondKeys.CondKeyVect::remove( cti, 1 ); + } + else { + /* Leave alone. */ + cti++; + } + } + } + else { + /* Does not exist in the cond set. We will add it. */ + + if ( state->outCondSpace == 0 ) { + /* Note that unlike transitions, we start here with an empty key + * list. Add the item */ + state->outCondKeys.append( 0 ); + } + + /* Allocate a cond space for the merged set. */ + CondSpace *mergedCondSpace = addCondSpace( mergedCS ); + state->outCondSpace = mergedCondSpace; + + /* FIXME: assumes one item always. */ + + /* Translate original condition values, making space for the new bit + * (possibly) introduced by the condition embedding. */ + for ( int cti = 0; cti < state->outCondKeys.length(); cti++ ) { + long origVal = state->outCondKeys[cti]; + long newVal = 0; + + /* For every set bit in the orig, find it's position in the merged + * and set the bit appropriately. */ + for ( CondSet::Iter csi = origCS; csi.lte(); csi++ ) { + /* If set, find it in the merged set and flip the bit to 1. If + * not set, there is nothing to do (convenient eh?) */ + if ( origVal & (1 << csi.pos()) ) { + /* The condition is set. Find the bit position in the + * merged set. */ + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + newVal |= 1 << bitPos; + } + } + + if ( origVal != newVal ) + state->outCondKeys[cti] = newVal; + + /* Now set the new bit appropriately. Since it defaults to zero we + * only take action if sense is positive. */ + if ( sense ) { + Action **cim = mergedCS.find( condAction ); + int pos = cim - mergedCS.data; + state->outCondKeys[cti] = state->outCondKeys[cti] | (1 << pos); + } + } + } +} diff --git a/libfsm/fsmgraph.cc b/libfsm/fsmgraph.cc new file mode 100644 index 00000000..819bfa96 --- /dev/null +++ b/libfsm/fsmgraph.cc @@ -0,0 +1,1948 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "action.h" + +using std::endl; + +Action::~Action() +{ + /* If we were created by substitution of another action then we don't own the inline list. */ + if ( substOf == 0 && inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + inlineList = 0; + } +} + +InlineItem::~InlineItem() +{ + if ( children != 0 ) { + children->empty(); + delete children; + } +} + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +StateAp *FsmAp::addState() +{ + /* Make the new state to return. */ + StateAp *state = new StateAp(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +FsmAp *FsmAp::concatFsm( FsmCtx *ctx, Key *str, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Make the first state and set it as the start state. */ + StateAp *last = fsm->addState(); + fsm->setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = fsm->addState(); + fsm->attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + fsm->setFinState( last ); + + return fsm; +} + +/* Case insensitive version of concatFsm. */ +FsmAp *FsmAp::concatFsmCI( FsmCtx *ctx, Key *str, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Make the first state and set it as the start state. */ + StateAp *last = fsm->addState(); + fsm->setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = fsm->addState(); + + KeySet keySet( ctx->keyOps ); + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + fsm->attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + fsm->setFinState( last ); + + return fsm; +} + + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. */ +FsmAp *FsmAp::concatFsm( FsmCtx *ctx, Key chr ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach on the character. */ + fsm->attachNewTrans( fsm->startState, end, chr, chr ); + + return fsm; +} + +/* Case insensitive version of single-char concat FSM. */ +FsmAp *FsmAp::concatFsmCI( FsmCtx *ctx, Key chr ) +{ + return concatFsmCI( ctx, &chr, 1 ); +} + + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +FsmAp *FsmAp::orFsm( FsmCtx *ctx, Key *set, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( ctx->keyOps->lt( set[i-1], set[i] ) ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + fsm->attachNewTrans( fsm->startState, end, set[i], set[i] ); + + return fsm; +} + +FsmAp *FsmAp::dotFsm( FsmCtx *ctx ) +{ + FsmAp *retFsm = FsmAp::rangeFsm( ctx, + ctx->keyOps->minKey, ctx->keyOps->maxKey ); + return retFsm; +} + +FsmAp *FsmAp::dotStarFsm( FsmCtx *ctx ) +{ + FsmAp *retFsm = FsmAp::rangeStarFsm( ctx, + ctx->keyOps->minKey, ctx->keyOps->maxKey ); + return retFsm; +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +FsmAp *FsmAp::rangeFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach using the range of characters. */ + fsm->attachNewTrans( fsm->startState, end, low, high ); + + return fsm; +} + +FsmAp *FsmAp::notRangeFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach using the range of characters. */ + if ( ctx->keyOps->lt( ctx->keyOps->minKey, low ) ) { + ctx->keyOps->decrement( low ); + fsm->attachNewTrans( fsm->startState, end, ctx->keyOps->minKey, low ); + } + + if ( ctx->keyOps->lt( high, ctx->keyOps->maxKey ) ) { + ctx->keyOps->increment( high ); + fsm->attachNewTrans( fsm->startState, end, high, ctx->keyOps->maxKey ); + } + + return fsm; +} + + +FsmAp *FsmAp::rangeFsmCI( FsmCtx *ctx, Key lowKey, Key highKey ) +{ + FsmAp *retFsm = rangeFsm( ctx, lowKey, highKey ); + + /* Union the portion that covers alphas. */ + if ( lowKey.getVal() <= 'z' ) { + int low, high; + if ( lowKey.getVal() <= 'a' ) + low = 'a'; + else + low = lowKey.getVal(); + + if ( highKey.getVal() >= 'a' ) { + if ( highKey.getVal() >= 'z' ) + high = 'z'; + else + high = highKey.getVal(); + + /* Add in upper(low) .. upper(high) */ + + FsmAp *addFsm = FsmAp::rangeFsm( ctx, toupper(low), toupper(high) ); + FsmRes res = FsmAp::unionOp( retFsm, addFsm ); + retFsm = res.fsm; + } + } + + if ( lowKey.getVal() <= 'Z' ) { + int low, high; + if ( lowKey.getVal() <= 'A' ) + low = 'A'; + else + low = lowKey.getVal(); + + if ( highKey.getVal() >= 'A' ) { + if ( highKey.getVal() >= 'Z' ) + high = 'Z'; + else + high = highKey.getVal(); + + /* Add in lower(low) .. lower(high) */ + FsmAp *addFsm = FsmAp::rangeFsm( ctx, tolower(low), tolower(high) ); + FsmRes res = FsmAp::unionOp( retFsm, addFsm ); + retFsm = res.fsm; + } + } + + return retFsm; +} + +/* Construct a machine that a repeated range of characters. */ +FsmAp *FsmAp::rangeStarFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* One state which is final and is the start state. */ + fsm->setStartState( fsm->addState() ); + fsm->setFinState( fsm->startState ); + + /* Attach start to start using range of characters. */ + fsm->attachNewTrans( fsm->startState, fsm->startState, low, high ); + + return fsm; +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +FsmAp *FsmAp::lambdaFsm( FsmCtx *ctx ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Give it one state with no transitions making it + * the start state and final state. */ + fsm->setStartState( fsm->addState() ); + fsm->setFinState( fsm->startState ); + + return fsm; +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +FsmAp *FsmAp::emptyFsm( FsmCtx *ctx ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Give it one state with no transitions making it + * the start state and final state. */ + fsm->setStartState( fsm->addState() ); + + return fsm; +} + +void FsmAp::transferOutData( StateAp *destState, StateAp *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->tdap()->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->tdap()->priorTable.setPriors( srcState->outPriorTable ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + cond->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + cond->priorTable.setPriors( srcState->outPriorTable ); + } + } + } + } + + if ( destState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *destState->nfaOut; na.lte(); na++ ) + transferOutToNfaTrans( na, srcState ); + } +} + +/* Union worker used by union, set diff (subtract) and intersection. */ +FsmRes FsmAp::doUnion( FsmAp *fsm, FsmAp *other ) +{ + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( fsm->startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + fsm->unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + fsm->stateList.append( other->stateList ); + fsm->misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + fsm->finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + fsm->setStartState( fsm->addState() ); + + /* Merge the start states. */ + fsm->mergeStateList( fsm->startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + return fillInStates( fsm ); +} + +bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + StateAp *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmAp::shadowReadWriteStates() +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + StateAp *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + StateAp *shadow = addState(); + mergeStates( shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmAp::resolveEpsilonTrans() +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates(); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( st, ept->targ ); + else + mergeStates( st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +FsmRes FsmAp::applyNfaTrans( FsmAp *fsm, StateAp *fromState, StateAp *toState, NfaTrans *nfaTrans ) +{ + fsm->setMisfitAccounting( true ); + + fsm->mergeStates( fromState, toState, false ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Can nuke the epsilon transition that we will never + * follow. */ + fsm->detachFromNfa( fromState, toState, nfaTrans ); + fromState->nfaOut->detach( nfaTrans ); + delete nfaTrans; + + if ( fromState->nfaOut->length() == 0 ) { + delete fromState->nfaOut; + fromState->nfaOut = 0; + } + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +void FsmAp::globOp( FsmAp **others, int numOthers ) +{ + for ( int m = 0; m < numOthers; m++ ) { + assert( ctx == others[m]->ctx ); + } + + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +/* Used near the end of an fsm construction. Any labels that are still around + * are referenced only by gotos and calls and they need to be made into + * deterministic entry points. */ +void FsmAp::deterministicEntry() +{ + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + StateAp *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & STB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~STB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & STB_BOTH && + (state->stateBits & STB_BOTH) != STB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~STB_BOTH; + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +FsmRes FsmAp::starOp( FsmAp *fsm ) +{ + /* The start func orders need to be shifted before doing the star. */ + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + StateAp *prevStartState = fsm->startState; + fsm->unsetStartState(); + fsm->setStartState( fsm->addState() ); + + /* Merge the new start state with the old one to isolate it. */ + fsm->mergeStates( fsm->startState, prevStartState ); + + if ( !fsm->startState->isFinState() ) { + /* Common case, safe to merge. */ + for ( StateSet::Iter st = fsm->finStateSet; st.lte(); st++ ) + fsm->mergeStatesLeaving( *st, fsm->startState ); + } + else { + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + StateSet origFin = fsm->finStateSet; + for ( StateSet::Iter st = origFin; st.lte(); st++ ) { + if ( *st != fsm->startState ) + fsm->mergeStatesLeaving( *st, fsm->startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( fsm->startState->isFinState() ) + fsm->mergeStatesLeaving( fsm->startState, fsm->startState ); + } + + /* Now ensure the new start state is a final state. */ + fsm->setFinState( fsm->startState ); + + /* Fill in any states that were newed up as combinations of others. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::plusOp( FsmAp *fsm ) +{ + /* Need a duplicate for the star end. */ + FsmAp *factorDup = new FsmAp( *fsm ); + + /* Star the duplicate. */ + FsmRes res1 = FsmAp::starOp( factorDup ); + if ( !res1.success() ) + return res1; + + FsmRes res2 = FsmAp::concatOp( fsm, res1.fsm ); + if ( !res2.success() ) + return res2; + + return res2; +} + +FsmRes FsmAp::questionOp( FsmAp *fsm ) +{ + /* Make the null fsm. */ + FsmAp *nu = FsmAp::lambdaFsm( fsm->ctx ); + + /* Perform the question operator. */ + FsmRes res = FsmAp::unionOp( fsm, nu ); + if ( !res.success() ) + return res; + + return res; +} + +FsmRes FsmAp::exactRepeatOp( FsmAp *fsm, int times ) +{ + /* Zero repetitions produces lambda machine. */ + if ( times == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *fsm ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmAp *dup = new FsmAp( *copyFrom ); + FsmRes res = concatOp( fsm, dup ); + if ( !res.success() ) { + delete copyFrom; + return res; + } + } + + /* Now use the copyFrom on the end. */ + FsmRes res = concatOp( fsm, copyFrom ); + if ( !res.success()) + return res; + + res.fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::maxRepeatOp( FsmAp *fsm, int times ) +{ + /* Zero repetitions produces lambda machine. */ + if ( times == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + isolateStartState( fsm ); + fsm->setFinState( fsm->startState ); + return FsmRes( FsmRes::Fsm(), fsm ); + } + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *fsm ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( fsm->finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + isolateStartState( fsm ); + fsm->setFinState( fsm->startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmAp *dup = new FsmAp( *copyFrom ); + dup->setFinBits( STB_GRAPH2 ); + FsmRes res = concatOp( fsm, dup, false, &lastFinSet, true ); + if ( !res.success() ) { + delete copyFrom; + return res; + } + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < fsm->finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + StateAp *fs = fsm->finStateSet[i]; + if ( fs->stateBits & STB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~STB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + FsmRes res = concatOp( fsm, copyFrom, false, &lastFinSet, true ); + if ( !res.success() ) + return res; + + res.fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::minRepeatOp( FsmAp *fsm, int times ) +{ + if ( times == 0 ) { + /* Acts just like a star op on the machine to return. */ + return FsmAp::starOp( fsm ); + } + else { + /* Take a duplicate for the star below. */ + FsmAp *dup = new FsmAp( *fsm ); + + /* Do repetition on the first half. */ + FsmRes exact = FsmAp::exactRepeatOp( fsm, times ); + if ( !exact.success() ) { + delete dup; + return exact; + } + + /* Star the duplicate. */ + FsmRes star = FsmAp::starOp( dup ); + if ( !star.success() ) { + delete exact.fsm; + return star; + } + + /* Tack on the kleene star. */ + return FsmAp::concatOp( exact.fsm, star.fsm ); + } +} + +FsmRes FsmAp::rangeRepeatOp( FsmAp *fsm, int lowerRep, int upperRep ) +{ + if ( lowerRep == 0 && upperRep == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + else if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + return FsmAp::maxRepeatOp( fsm, upperRep ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + return FsmAp::exactRepeatOp( fsm, lowerRep ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmAp *dup = new FsmAp( *fsm ); + + /* Do repetition on the first half. */ + FsmRes exact = FsmAp::exactRepeatOp( fsm, lowerRep ); + if ( !exact.success() ) { + delete dup; + return exact; + } + + /* Do optional repetition on the second half. */ + FsmRes optional = FsmAp::maxRepeatOp( dup, upperRep - lowerRep ); + if ( !optional.success() ) { + delete exact.fsm; + return optional; + } + + /* Concat two halves. */ + return FsmAp::concatOp( exact.fsm, optional.fsm ); + } +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. Supports specifying the fromStates (istead of first final state + * set). This is useful for a max-repeat schenario, where from states are not + * all of first's final states. Also supports treating the concatentation as + * optional, which leaves the final states of the first machine as final. */ +FsmRes FsmAp::concatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq, StateSet *fromStates, bool optional ) +{ + for ( PriorTable::Iter g = other->startState->guardedInTable; g.lte(); g++ ) { + fsm->allTransPrior( 0, g->desc ); + other->allTransPrior( 0, g->desc->other ); + } + + /* Assert same signedness and return graph concatenation op. */ + assert( fsm->ctx == other->ctx ); + + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + StateAp *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + fsm->stateList.append( other->stateList ); + fsm->misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = fsm->finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + fsm->unsetAllFinStates(); + fsm->finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + StateAp *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + fsm->mergeStatesLeaving( state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + fsm->clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + FsmRes res = fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + res.fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +FsmRes FsmAp::rightStartConcatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + PriorDesc *priorDesc0 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc1 = fsm->ctx->allocPriorDesc(); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDesc0->key = fsm->ctx->nextPriorKey++; + priorDesc0->priority = 0; + fsm->allTransPrior( fsm->ctx->curPriorOrd++, priorDesc0 ); + + /* The start transitions of the right machine gets the higher + * priority. Use the same unique key. */ + priorDesc1->key = priorDesc0->key; + priorDesc1->priority = 1; + other->startFsmPrior( fsm->ctx->curPriorOrd++, priorDesc1 ); + + return concatOp( fsm, other, lastInSeq ); +} + +/* Returns union of fsm and other. Other is deleted. */ +FsmRes FsmAp::unionOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + fsm->ctx->unionOp = true; + + fsm->setFinBits( STB_GRAPH1 ); + other->setFinBits( STB_GRAPH2 ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + fsm->ctx->unionOp = false; + fsm->unsetFinBits( STB_BOTH ); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +/* Intersects other with this machine. Other is deleted. */ +FsmRes FsmAp::intersectOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + fsm->setFinBits( STB_GRAPH1 ); + other->setFinBits( STB_GRAPH2 ); + + /* Call worker Or routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Unset any final states that are no longer to + * be final due to final bits. */ + fsm->unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + fsm->removeDeadEndStates(); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +FsmRes FsmAp::subtractOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( STB_GRAPH1 ); + + /* Call worker Or routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Unset any final states that are no longer to + * be final due to final bits. */ + fsm->unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + fsm->removeDeadEndStates(); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +FsmRes FsmAp::epsilonOp( FsmAp *fsm ) +{ + fsm->setMisfitAccounting( true ); + + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + fsm->resolveEpsilonTrans(); + + /* Epsilons can caused merges which leave behind unreachable states. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return res; +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +FsmRes FsmAp::joinOp( FsmAp *fsm, int startId, int finalId, FsmAp **others, int numOthers ) +{ + for ( int m = 0; m < numOthers; m++ ) { + assert( fsm->ctx == others[m]->ctx ); + } + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + fsm->unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + fsm->stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + fsm->finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = fsm->entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + fsm->setStartState( fsm->addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + StateAp *newStart = fsm->addState(); + fsm->setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + fsm->mergeStateList( newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = fsm->finStateSet; + + /* Now all final states are unset. */ + fsm->unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + StateAp *finState = fsm->addState(); + fsm->setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + fsm->setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + fsm->resolveEpsilonTrans(); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & STB_ISFINAL) ) + fsm->clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + fsm->removeUnreachableStates(); + + return res; +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +FsmRes FsmAp::isolateStartState( FsmAp *fsm ) +{ + /* Do nothing if the start state is already isolated. */ + if ( fsm->isStartStateIsolated() ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + StateAp *prevStartState = fsm->startState; + fsm->unsetStartState(); + fsm->setStartState( fsm->addState() ); + + /* Merge the new start state with the old one to isolate it. */ + fsm->mergeStates( fsm->startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( fsm->stateDict.treeSize == 0 ); + assert( fsm->nfaList.length() == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +StateAp *FsmAp::dupStartState() +{ + StateAp *dup = addState(); + mergeStates( dup, startState ); + return dup; +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the source state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmAp::mergeStatesLeaving( StateAp *destState, StateAp *srcState ) +{ + if ( !hasOutData( destState ) ) { + /* Perform the merge, indicating we are leaving, which will affect how + * out conds are merged. */ + mergeStates( destState, srcState, true ); + } + else { + /* Dup the source state. */ + StateAp *ssMutable = addState(); + mergeStates( ssMutable, srcState ); + + /* Do out data transfer (and out condition embedding). */ + transferOutData( ssMutable, destState ); + + if ( destState->outCondSpace != 0 ) { + + doEmbedCondition( ssMutable, destState->outCondSpace->condSet, + destState->outCondKeys ); + } + + /* Now we merge with dest, setting leaving = true. This dictates how + * out conditions should be merged. */ + mergeStates( destState, ssMutable, true ); + } +} + +void FsmAp::checkEpsilonRegularInteraction( const PriorTable &t1, const PriorTable &t2 ) +{ + for ( PriorTable::Iter pd1 = t1; pd1.lte(); pd1++ ) { + for ( PriorTable::Iter pd2 = t2; pd2.lte(); pd2++ ) { + /* Looking for unequal guarded priorities with the same key. */ + if ( pd1->desc->key == pd2->desc->key ) { + if ( pd1->desc->priority < pd2->desc->priority || + pd1->desc->priority > pd2->desc->priority ) + { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + } + } + } + } +} + +void FsmAp::mergeStateProperties( StateAp *destState, StateAp *srcState ) +{ + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + + /* Not touching guarded-in table or out conditions. Probably should + * leave some of the above alone as well. */ + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + destState->lmNfaParts.insert( srcState->lmNfaParts ); + destState->guardedInTable.setPriors( srcState->guardedInTable ); + } +} + +void FsmAp::mergeStateBits( StateAp *destState, StateAp *srcState ) +{ + /* Get bits and final state status. Note in the above code we depend on the + * original final state status being present. */ + destState->stateBits |= ( srcState->stateBits & ~STB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); +} + +void FsmAp::mergeNfaTransitions( StateAp *destState, StateAp *srcState ) +{ + /* Copy in any NFA transitions. */ + if ( srcState->nfaOut != 0 ) { + if ( destState->nfaOut == 0 ) + destState->nfaOut = new NfaTransList; + + for ( NfaTransList::Iter nt = *srcState->nfaOut; nt.lte(); nt++ ) { + NfaTrans *trans = new NfaTrans( + nt->pushTable, nt->restoreTable, + nt->popFrom, nt->popCondSpace, nt->popCondKeys, + nt->popAction, nt->popTest, nt->order ); + + destState->nfaOut->append( trans ); + attachToNfa( destState, nt->toState, trans ); + } + } +} + +void FsmAp::checkPriorInteractions( StateAp *destState, StateAp *srcState ) +{ + /* Run a check on priority interactions between epsilon transitions and + * regular transitions. This can't be used to affect machine construction, + * only to check for priority guards. */ + if ( destState->nfaOut != 0 ) { + for ( NfaTransList::Iter nt = *destState->nfaOut; nt.lte(); nt++ ) { + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + checkEpsilonRegularInteraction( + trans->tdap()->priorTable, nt->priorTable ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; + cond.lte(); cond++ ) + { + checkEpsilonRegularInteraction( + cond->priorTable, nt->priorTable ); + + } + } + } + } + } +} + +void FsmAp::mergeStates( StateAp *destState, StateAp *srcState, bool leaving ) +{ + /* Transitions. */ + outTransCopy( destState, srcState->outList.head ); + + /* Properties such as out data, to/from actions. */ + mergeStateProperties( destState, srcState ); + + /* Merge out conditions, depends on the operation (leaving or not). */ + mergeOutConds( destState, srcState, leaving ); + + /* State bits, including final state stats. Out conds depnds on this + * happening after. */ + mergeStateBits( destState, srcState ); + + /* Draw in the NFA transitions. */ + mergeNfaTransitions( destState, srcState ); + + /* Hacked in check for priority interactions, allowing detection of some + * bad situations. */ + checkPriorInteractions( destState, srcState ); +} + +void FsmAp::mergeStateList( StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( destState, srcStates[s] ); +} + +void FsmAp::cleanAbortedFill( StateAp *state ) +{ + /* Iterate the out transitions, deleting them. */ + for ( TransList::Iter n, t = state->outList; t.lte(); ) { + n = t.next(); + if ( t->plain() ) + delete t->tdap(); + else + delete t->tcap(); + t = n; + } + + state->outList.abandon(); + + if ( state->nfaIn != 0 ) { + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + state->nfaOut->empty(); + delete state->nfaOut; + state->nfaOut = 0; + } +} + +void FsmAp::cleanAbortedFill() +{ + while ( nfaList.length() > 0 ) { + StateAp *state = nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + //mergeStateList( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + nfaList.detach( state ); + } + + /* Disassociated state dict elements from states. */ + for ( StateDict::Iter sdi = stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete all the state dict elements. */ + stateDict.empty(); + + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + cleanAbortedFill( state ); + + /* Delete all the states. */ + stateList.empty(); + + /* Delete all the transitions. */ + for ( StateList::Iter state = misfitList; state.lte(); state++ ) + cleanAbortedFill( state ); + + /* Delete all the states. */ + misfitList.empty(); +} + +bool FsmAp::overStateLimit() +{ + if ( ctx->stateLimit > FsmCtx::STATE_UNLIMITED ) { + long states = misfitList.length() + stateList.length(); + if ( states > ctx->stateLimit ) + return true; + } + return false; +} + +bool FsmAp::fillAbort( FsmRes &res, FsmAp *fsm ) +{ + if ( fsm->priorInteraction ) { + fsm->cleanAbortedFill(); + int guardId = fsm->guardId; + delete fsm; + res = FsmRes( FsmRes::PriorInteraction(), guardId ); + return true; + } + + if ( fsm->overStateLimit() ) { + fsm->cleanAbortedFill(); + delete fsm; + res = FsmRes( FsmRes::TooManyStates() ); + return true; + } + + return false; +} + +FsmRes FsmAp::fillInStates( FsmAp *fsm ) +{ + /* Used as return value on success. Filled in with error on abort. */ + FsmRes res( FsmRes::Fsm(), fsm ); + + /* Merge any states that are awaiting merging. This will likey cause other + * states to be added to the NFA list. */ + while ( true ) { + if ( fillAbort( res, fsm ) ) + return res; + + if ( fsm->nfaList.length() == 0 ) + break; + + StateAp *state = fsm->nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + fsm->mergeStateList( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + fsm->detachStateDict( state, *s ); + + fsm->nfaList.detach( state ); + } + + /* The NFA list is empty at this point. There are no state sets we need to + * preserve. */ + + /* Disassociated state dict elements from states. */ + for ( StateDict::Iter sdi = fsm->stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete all the state dict elements. */ + fsm->stateDict.empty(); + + return res; +} + +/* Check if a machine defines a single character. This is useful in validating + * ranges and machines to export. */ +bool FsmAp::checkSingleCharMachine() +{ + /* Must have two states. */ + if ( stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + TransAp *startTrans = startState->outList.head; + if ( ctx->keyOps->ne( startTrans->lowKey, startTrans->highKey ) ) + return false; + return true; +} + +FsmRes FsmAp::condCostFromState( FsmAp *fsm, StateAp *state, long depth ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & STB_ONLIST ) + return FsmRes( FsmRes::Fsm(), fsm ); + + if ( depth > fsm->ctx->condsCheckDepth ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Doing depth first, put state on the list. */ + state->stateBits |= STB_ONLIST; + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + FsmRes res = condCostFromState( fsm, trans->tdap()->toState, depth + 1 ); + if ( !res.success() ) + return res; + } + } + else { + for ( CondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + if ( (*csi)->costMark ) + return FsmRes( FsmRes::CondCostTooHigh(), (*csi)->costId ); + } + + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + FsmRes res = condCostFromState( fsm, cond->toState, depth + 1 ); + if ( !res.success() ) + return res; + } + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + /* We do not increment depth here since this is an epsilon transition. */ + FsmRes res = condCostFromState( fsm, n->toState, depth ); + if ( !res.success() ) + return res; + } + } + + for ( ActionTable::Iter a = state->fromStateActionTable; a.lte(); a++ ) { + if ( a->value->costMark ) + return FsmRes( FsmRes::CondCostTooHigh(), a->value->costId ); + } + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* Returns either success (using supplied fsm), or some error condition. */ +FsmRes FsmAp::condCostSearch( FsmAp *fsm ) +{ + /* Init on state list flags. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->stateBits &= ~STB_ONLIST; + + FsmRes res = condCostFromState( fsm, fsm->startState, 1 ); + if ( !res.success() ) + delete fsm; + return res; +} + +void FsmAp::condCost( Action *action, long repId ) +{ + action->costMark = true; + action->costId = repId; +} + +/* + * This algorithm assigns a price to each state visit, then adds that to a + * running total. Note that we do not guard against multiple visits to a state, + * since we are estimating runtime cost. + * + * We rely on a character histogram and are looking for a probability of being + * in any given state, given that histogram, simple and very effective. + */ +void FsmAp::breadthFromState( double &total, int &minDepth, double *histogram, + FsmAp *fsm, StateAp *state, long depth, int maxDepth, double stateScore ) +{ + if ( depth > maxDepth ) + return; + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + + /* Compute target state score. */ + double span = 0; + for ( int i = trans->lowKey.getVal(); i <= trans->highKey.getVal(); i++ ) + span += histogram[i]; + + double targetStateScore = stateScore * ( span ); + + /* Add to the level. */ + total += targetStateScore; + + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + if ( trans->tdap()->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + breadthFromState( total, minDepth, histogram, fsm, trans->tdap()->toState, + depth + 1, maxDepth, targetStateScore ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + if ( cond->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + breadthFromState( total, minDepth, histogram, fsm, cond->toState, + depth + 1, maxDepth, targetStateScore ); + } + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + if ( n->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + /* We do not increment depth here since this is an epsilon transition. */ + breadthFromState( total, minDepth, histogram, fsm, n->toState, depth, maxDepth, stateScore ); + } + } +} + +void FsmAp::breadthFromEntry( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state ) +{ + long depth = 1; + int maxDepth = 5; + double stateScore = 1.0; + + FsmAp::breadthFromState( total, minDepth, histogram, fsm, state, depth, maxDepth, stateScore ); +} + + +void FsmAp::applyEntryPriorGuard( FsmAp *fsm, long repId ) +{ + PriorDesc *priorDesc0 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc1 = fsm->ctx->allocPriorDesc(); + + priorDesc0->key = fsm->ctx->nextPriorKey; + priorDesc0->priority = 0; + priorDesc0->guarded = true; + priorDesc0->guardId = repId; + priorDesc0->other = priorDesc1; + + priorDesc1->key = fsm->ctx->nextPriorKey; + priorDesc1->priority = 1; + priorDesc1->guarded = true; + priorDesc1->guardId = repId; + priorDesc1->other = priorDesc0; + + /* Roll over for next allocation. */ + fsm->ctx->nextPriorKey += 1; + + /* Only need to set the first. Second is referenced using 'other' field. */ + fsm->startState->guardedInTable.setPrior( 0, priorDesc0 ); +} + +void FsmAp::applyRepeatPriorGuard( FsmAp *fsm, long repId ) +{ + PriorDesc *priorDesc2 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc3 = fsm->ctx->allocPriorDesc(); + + priorDesc2->key = fsm->ctx->nextPriorKey; + priorDesc2->priority = 0; + priorDesc2->guarded = true; + priorDesc2->guardId = repId; + priorDesc2->other = priorDesc3; + + priorDesc3->key = fsm->ctx->nextPriorKey; + priorDesc3->guarded = true; + priorDesc3->priority = 1; + priorDesc3->guardId = repId; + priorDesc3->other = priorDesc2; + + /* Roll over for next allocation. */ + fsm->ctx->nextPriorKey += 1; + + /* Only need to set the first. Second is referenced using 'other' field. */ + fsm->startState->guardedInTable.setPrior( 0, priorDesc2 ); + + fsm->allTransPrior( fsm->ctx->curPriorOrd++, priorDesc3 ); + fsm->leaveFsmPrior( fsm->ctx->curPriorOrd++, priorDesc2 ); +} + +FsmRes FsmAp::condPlus( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ) +{ + condCost( ini, repId ); + condCost( inc, repId ); + condCost( min, repId ); + if ( max != 0 ) + condCost( max, repId ); + + fsm->startFsmAction( 0, inc ); + + if ( max != 0 ) { + FsmRes res = fsm->startFsmCondition( max, true ); + if ( !res.success() ) + return res; + } + + /* Need a duplicated for the star end. */ + FsmAp *dup = new FsmAp( *fsm ); + + applyRepeatPriorGuard( dup, repId ); + + /* Star the duplicate. */ + FsmRes dupStar = FsmAp::starOp( dup ); + if ( !dupStar.success() ) { + delete fsm; + return dupStar; + } + + FsmRes res = FsmAp::concatOp( fsm, dupStar.fsm ); + if ( !res.success() ) + return res; + + /* End plus operation. */ + + res.fsm->leaveFsmCondition( min, true ); + + /* Init action. */ + res.fsm->startFromStateAction( 0, ini ); + + /* Leading priority guard. */ + applyEntryPriorGuard( res.fsm, repId ); + + return res; +} + +FsmRes FsmAp::condStar( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ) +{ + condCost( ini, repId ); + condCost( inc, repId ); + condCost( min, repId ); + if ( max != 0 ) + condCost( max, repId ); + + /* Increment. */ + fsm->startFsmAction( 0, inc ); + + /* Max (optional). */ + if ( max != 0 ) { + FsmRes res = fsm->startFsmCondition( max, true ); + if ( !res.success() ) + return res; + } + + applyRepeatPriorGuard( fsm, repId ); + + /* Star. */ + FsmRes res = FsmAp::starOp( fsm ); + if ( !res.success() ) + return res; + + /* Restrict leaving. */ + res.fsm->leaveFsmCondition( min, true ); + + /* Init action. */ + res.fsm->startFromStateAction( 0, ini ); + + /* Leading priority guard. */ + applyEntryPriorGuard( res.fsm, repId ); + + return res; +} + +/* Remove duplicates of unique actions from an action table. */ +void FsmAp::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void FsmAp::removeActionDups() +{ + /* Loop all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + removeDups( trans->tdap()->actionTable ); + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) + removeDups( cond->actionTable ); + } + } + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + diff --git a/libfsm/fsmgraph.h b/libfsm/fsmgraph.h new file mode 100644 index 00000000..2429d923 --- /dev/null +++ b/libfsm/fsmgraph.h @@ -0,0 +1,2541 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _FSMGRAPH_H +#define _FSMGRAPH_H + +#include "config.h" +#include "ragel.h" +#include "common.h" +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "dlistmel.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "avlmap.h" + +#include <assert.h> +#include <iostream> +#include <sstream> +#include <string> + + +/* Flags that control merging. */ +#define STB_GRAPH1 0x01 +#define STB_GRAPH2 0x02 +#define STB_BOTH 0x03 +#define STB_ISFINAL 0x04 +#define STB_ISMARKED 0x08 +#define STB_ONLIST 0x10 +#define STB_NFA_REP 0x20 + +using std::ostream; + +struct TransAp; +struct StateAp; +struct FsmAp; +struct Action; +struct LongestMatchPart; +struct LengthDef; +struct CondSpace; +struct FsmCtx; +struct InlineBlock; +struct InlineList; + +struct TooManyStates {}; + +struct PriorInteraction +{ + PriorInteraction( long long id ) : id(id) {} + long long id; +}; + +struct NfaRound +{ + NfaRound( long depth, long groups ) + : depth(depth), groups(groups) {} + + long depth; + long groups; +}; + +typedef Vector<NfaRound> NfaRoundVect; + +struct CondCostTooHigh +{ + CondCostTooHigh( long long costId ) + : costId(costId) {} + + long long costId; +}; + + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + StateAp *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Nodes in the tree that use this action. */ +struct NameInst; +struct InlineList; +typedef Vector<NameInst*> NameInstVect; + +struct ActionParam +{ + ActionParam( std::string name ) + : name(name) {} + + std::string name; +}; + +typedef Vector<ActionParam*> ActionParamList; + +typedef Vector<Action*> ActionArgList; + +struct CmpActionArgList +{ + static inline int compare( const ActionArgList *list1, const ActionArgList *list2 ) + { + return CmpTable<Action*>::compare( *list1, *list2 ); + } +}; + +typedef BstMap<ActionArgList*, Action*, CmpActionArgList> ActionArgListMap; +typedef BstMapEl<ActionArgList*, Action*> ActionArgListMapEl; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action>, + public AvlTreeEl<Action> +{ +public: + + Action( const InputLoc &loc, std::string name, InlineList *inlineList, int condId ) + : + loc(loc), + name(name), + inlineList(inlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + numNfaRefs(0), + anyCall(false), + isLmAction(false), + condId(condId), + costMark(false), + costId(0), + paramList(0), + argListMap(0), + substOf(0), + argList(0) + { + } + + ~Action(); + + static Action *cons( const InputLoc &loc, Action *substOf, + ActionArgList *argList, int condId ) + { + Action *action = new Action( loc, std::string(), 0, condId ); + action->substOf = substOf; + action->argList = argList; + action->inlineList = substOf->inlineList; + return action; + } + + /* Key for action dictionary. */ + std::string getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + std::string name; + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name.empty() ) + out << loc.line << ":" << loc.col; + else + out << name; + } + + /* Nodes in the name tree where the action is embedded. This serves as the + * root for name searches. Since actions can be used multiple times we use + * a vector. Name resolver deals with contracts. */ + NameInstVect embedRoots; + + /* Number of references in the final machine. */ + int numRefs() + { + return numTransRefs + numToStateRefs + + numFromStateRefs + numEofRefs + + numNfaRefs; + } + + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + int numNfaRefs; + bool anyCall; + + bool isLmAction; + int condId; + + bool costMark; + long long costId; + + ActionParamList *paramList; + ActionArgListMap *argListMap; + Action *substOf; + ActionArgList *argList; +}; + +struct CmpCondId +{ + static inline int compare( const Action *cond1, const Action *cond2 ) + { + if ( cond1->condId < cond2->condId ) + return -1; + else if ( cond1->condId > cond2->condId ) + return 1; + return 0; + } +}; + +/* A list of actions. */ +typedef DList<Action> ActionList; +typedef AvlTree<Action, std::string, CmpString> ActionDict; + +/* Structure for reverse action mapping. */ +struct RevActionMapEl +{ + char *name; + InputLoc location; +}; + + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; +typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, LongestMatchPart*, CmpOrd<int> > +{ + void setAction( int ordering, LongestMatchPart *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + PriorDesc() + : + key(0), + priority(0), + guarded(false), + guardId(0), + other(0) + {} + + int key; + int priority; + bool guarded; + long long guardId; + PriorDesc *other; + + PriorDesc *prev, *next; +}; + +typedef DList<PriorDesc> PriorDescList; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector<int> TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This class + * provides the iterator of a single list. */ +template <class Element> struct InList +{ + InList() : head(0) { } + + Element *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const InList &il ) : ptr(il.head) { } + Iter &operator=( const InList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator Element*() const { return ptr; } + Element &operator *() const { return *ptr; } + Element *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + Element *ptr; + }; +}; + +struct TransData +{ + TransData() + : + fromState(0), toState(0) + {} + + TransData( const TransData &other ) + : + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable), + lmActionTable(other.lmActionTable) + { + } + + StateAp *fromState; + StateAp *toState; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + + +/* The element for the sub-list within a TransAp. These specify the transitions + * and are keyed by the condition expressions. */ +struct CondAp + : public TransData +{ + CondAp( TransAp *transAp ) + : + TransData(), + transAp(transAp), + key(0) + {} + + CondAp( const CondAp &other, TransAp *transAp ) + : + TransData( other ), + transAp(transAp), + key(other.key) + { + } + + /* Owning transition. */ + TransAp *transAp; + + CondKey key; + + /* Pointers for outlist. */ + CondAp *prev, *next; + + /* Pointers for in-list. */ + CondAp *ilprev, *ilnext; +}; + +typedef DList<CondAp> CondList; + +struct TransCondAp; +struct TransDataAp; + +/* Transition class that implements actions and priorities. */ +struct TransAp +{ + TransAp() + : condSpace(0) {} + + TransAp( const TransAp &other ) + : + lowKey(other.lowKey), + highKey(other.highKey), + condSpace(other.condSpace) + { + } + + ~TransAp() + { + // delete condList.head; + // condList.abandon(); + } + + bool plain() const + { return condSpace == 0; } + + TransCondAp *tcap(); + TransDataAp *tdap(); + + long condFullSize(); + + Key lowKey, highKey; + + /* Which conditions are tested on this range. */ + CondSpace *condSpace; + + /* Pointers for outlist. */ + TransAp *prev, *next; +}; + +struct TransCondAp + : public TransAp +{ + TransCondAp() + : + TransAp() + {} + + TransCondAp( const TransCondAp &other ) + : + TransAp( other ), + condList() + {} + + ~TransCondAp() + { + condList.empty(); + } + + /* Cond trans list. Sorted by key value. */ + CondList condList; +}; + +struct TransDataAp + : public TransAp, public TransData +{ + TransDataAp() + : + TransAp(), + TransData() + {} + + TransDataAp( const TransDataAp &other ) + : + TransAp( other ), + TransData( other ) + {} + + /* Pointers for in-list. */ + TransDataAp *ilprev, *ilnext; +}; + +inline TransCondAp *TransAp::tcap() + { return this->condSpace != 0 ? static_cast<TransCondAp*>( this ) : 0; } + +inline TransDataAp *TransAp::tdap() + { return this->condSpace == 0 ? static_cast<TransDataAp*>( this ) : 0; } + +typedef DList<TransAp> TransList; + +/* Need the base vector type for accessing underlying remove function. */ +typedef BstSet<int> CondKeySet; +typedef Vector<int> CondKeyVect; + +/* State class that implements actions and priorities. */ + +struct NfaActions +{ + NfaActions( Action *push, Action *pop, int order ) + : push(push), pop(pop), order(order) {} + + Action *push; + Action *pop; + + int order; + + ActionTable pushTable; + ActionTable popTable; +}; + +struct NfaTrans +{ + NfaTrans( int order ) + : + fromState(0), + toState(0), + order(order), + popCondSpace(0) + { + } + + NfaTrans( const ActionTable &pushTable, + const ActionTable &restoreTable, + const ActionTable &popFrom, + CondSpace *popCondSpace, + const CondKeySet popCondKeys, + const ActionTable &popAction, + const ActionTable &popTable, + int order ) + : + fromState(0), toState(0), + order(order), + pushTable(pushTable), + restoreTable(restoreTable), + popFrom(popFrom), + popCondSpace(popCondSpace), + popCondKeys(popCondKeys), + popAction(popAction), + popTest(popTable) + {} + + NfaTrans( const NfaTrans &other ) + : + fromState(0), toState(0), + order(other.order), + pushTable(other.pushTable), + restoreTable(other.restoreTable), + popCondSpace(other.popCondSpace), + popCondKeys(other.popCondKeys), + popAction(other.popAction), + popTest(other.popTest), + priorTable(other.priorTable) + {} + + + StateAp *fromState; + StateAp *toState; + + int order; + + ActionTable pushTable; + ActionTable restoreTable; + + /* + * 1. Conditions transferred (always tested first) + * 2. Actions transferred + * 3. Pop actions created during epsilon draw. + */ + ActionTable popFrom; + CondSpace *popCondSpace; + CondKeySet popCondKeys; + + ActionTable popAction; + ActionTable popTest; + + PriorTable priorTable; + + NfaTrans *prev, *next; + NfaTrans *ilprev, *ilnext; +}; + + +typedef BstMap<StateAp*, NfaActions> NfaStateMap; +typedef BstMapEl<StateAp*, NfaActions> NfaStateMapEl; + +typedef DList<NfaTrans> NfaTransList; +typedef InList<NfaTrans> NfaInList; + +struct CmpNfaTrans +{ + static int compare( NfaTrans *t1, NfaTrans *t2 ) + { + /* This comparison is too strong. (okay to use something too strong -- + * we just don't find minimal). * */ + if ( t1->toState < t2->toState ) + return -1; + else if ( t1->toState > t2->toState ) + return 1; + else if ( t1->order < t2->order ) + return -1; + else if ( t1->order > t2->order ) + return 1; + else + { + int r = CmpActionTable::compare( t1->pushTable, t2->pushTable ); + if ( r != 0 ) + return r; + + r = CmpActionTable::compare( t1->restoreTable, t2->restoreTable ); + if ( r != 0 ) + return r; + + if ( t1->popCondSpace < t2->popCondSpace ) + return -1; + else if ( t1->popCondSpace > t2->popCondSpace ) + return 1; + + r = CmpTable<int>::compare( t1->popCondKeys, t2->popCondKeys ); + if ( r != 0 ) + return r; + + r = CmpActionTable::compare( t1->popTest, t2->popTest ); + if ( r != 0 ) + return r; + + r = CmpActionTable::compare( t1->popAction, t2->popAction ); + if ( r != 0 ) + return r; + } + + return 0; + } +}; + +struct CmpNfaTransList +{ + static int compare( const NfaTransList &l1, const NfaTransList &l2 ) + { + if ( l1.length() < l2.length() ) + return -1; + else if ( l1.length() > l2.length() ) + return 1; + else { + NfaTransList::Iter i1 = l1; + NfaTransList::Iter i2 = l2; + while ( i1.lte() ) { + int r = CmpNfaTrans::compare( i1, i2 ); + if ( r != 0 ) + return r; + i1++, i2++; + } + } + return 0; + } +}; + +struct CmpNfaStateMapEl +{ + static int compare( const NfaStateMapEl &el1, const NfaStateMapEl &el2 ) + { + if ( el1.key < el2.key ) + return -1; + else if ( el1.key > el2.key ) + return 1; + else if ( el1.value.push < el2.value.push ) + return -1; + else if ( el1.value.push > el2.value.push ) + return 1; + else if ( el1.value.pop < el2.value.pop ) + return -1; + else if ( el1.value.pop > el2.value.pop ) + return 1; + else if ( el1.value.order < el2.value.order ) + return -1; + else if ( el1.value.order > el2.value.order ) + return 1; + return 0; + } +}; + +/* Set of states, list of states. */ +typedef BstSet<StateAp*> StateSet; +typedef DList<StateAp> StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl<StateDictEl> +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + StateAp *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, TransAp *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + TransAp *value; +}; + +struct CmpKey +{ + CmpKey() + : keyOps(0) {} + + KeyOps *keyOps; + + int compare( const Key key1, const Key key2 ) + { + if ( keyOps->lt( key1, key2 ) ) + return -1; + else if ( keyOps->gt( key1, key2 ) ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +struct KeySet +: + public BstSet<Key, CmpKey> +{ + KeySet( KeyOps *keyOps ) + { + CmpKey::keyOps = keyOps; + } +}; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector<int> EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( StateAp *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + StateAp *targ; + bool leaving; +}; +typedef Vector<EptVectEl> EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet<int> EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet<LongestMatchPart*> LmItemSet; + +/* A Conditions which is to be + * transfered on pending out transitions. */ +struct OutCond +{ + OutCond( Action *action, bool sense ) + : action(action), sense(sense) {} + + Action *action; + bool sense; +}; + +struct CmpOutCond +{ + static int compare( const OutCond &outCond1, const OutCond &outCond2 ) + { + if ( outCond1.action < outCond2.action ) + return -1; + else if ( outCond1.action > outCond2.action ) + return 1; + else if ( outCond1.sense < outCond2.sense ) + return -1; + else if ( outCond1.sense > outCond2.sense ) + return 1; + return 0; + } +}; + +/* Conditions. */ +typedef BstSet< Action*, CmpCondId > CondSet; +typedef CmpTable< Action*, CmpCondId > CmpCondSet; + +struct CondSpace + : public AvlTreeEl<CondSpace> +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + long fullSize() + { return ( 1 << condSet.length() ); } + + CondSet condSet; + long condSpaceId; +}; + +typedef Vector<CondSpace*> CondSpaceVect; + +typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; + +typedef Vector<long> LongVect; + +struct CondData +{ + CondSpaceMap condSpaceMap; + + ~CondData() + { + condSpaceMap.empty(); + } +}; + +struct FsmGbl +{ + FsmGbl( const HostLang *hostLang ) + : + printStatistics(false), + errorCount(0), + displayPrintables(false), + hostLang(hostLang), + stringTables(false), + checkPriorInteraction(0), + wantDupsRemoved(true), + minimizeLevel(MinimizePartition2), + minimizeOpt(MinimizeMostOps) + {} + + bool printStatistics; + + /* + * Error reporting. + */ + + /* PROGNAME: txt */ + std::ostream &error(); + + /* file:loc: txt */ + std::ostream &error( const InputLoc &loc ); + + /* txt */ + std::ostream &error_plain(); + + /* file:loc: warning: txt */ + std::ostream &warning( const InputLoc &loc ); + + /* Stats reporting. */ + std::ostream &stats(); + + /* Requested info. */ + std::ostream &info(); + + std::stringstream libcerr; + std::stringstream libcout; + + int errorCount; + void abortCompile( int code ); + bool displayPrintables; + + const HostLang *hostLang; + bool stringTables; + bool checkPriorInteraction; + bool wantDupsRemoved; + + MinimizeLevel minimizeLevel; + MinimizeOpt minimizeOpt; +}; + +/* All FSM operations must be between machines that have been created using the + * same context object. */ +struct FsmCtx +{ + FsmCtx( FsmGbl *fsmGbl ); + ~FsmCtx(); + + KeyOps *keyOps; + CondData *condData; + MinimizeLevel minimizeLevel; + MinimizeOpt minimizeOpt; + + static const int STATE_UNLIMITED = 0; + + long stateLimit; + bool printStatistics; + bool checkPriorInteraction; + + bool unionOp; + + long condsCheckDepth; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + int nextPriorKey; + int nextCondId; + + PriorDesc *allocPriorDesc() + { + PriorDesc *priorDesc = new PriorDesc(); + priorDescList.append( priorDesc ); + return priorDesc; + } + + PriorDescList priorDescList; + + FsmGbl *fsmGbl; + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + ExportList exportList; + + bool generatingSectionSubset; + bool lmRequiresErrorState; + + /* Make name ids to name inst pointers. */ + NameInst **nameIndex; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + + /* Stack management */ + InlineBlock *prePushExpr; + InlineBlock *postPopExpr; + + /* Nfa stack managment. */ + InlineBlock *nfaPrePushExpr; + InlineBlock *nfaPostPopExpr; + + /* Overriding variables. */ + InlineList *pExpr; + InlineList *peExpr; + InlineList *eofExpr; + InlineList *csExpr; + InlineList *topExpr; + InlineList *stackExpr; + InlineList *actExpr; + InlineList *tokstartExpr; + InlineList *tokendExpr; + InlineList *dataExpr; + + Action *newNfaWrapAction( const char *name, InlineList *inlineList, Action *optWrap ); + void createNfaActions( FsmAp *fsm ); + + /* Checking the contents of actions. */ + void checkAction( Action *action ); + void checkInlineList( Action *act, InlineList *inlineList ); + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmAp *graph ); + + void finalizeInstance( FsmAp *graph ); + void prepareReduction( FsmAp *sectionGraph ); +}; + +typedef InList<CondAp> CondInList; +typedef InList<TransDataAp> TransInList; + +struct NfaStateEl +{ + StateAp *prev, *next; +}; + +typedef DListMel<StateAp, NfaStateEl> NfaStateList; + +struct StateAp + : public NfaStateEl +{ + StateAp(); + StateAp(const StateAp &other); + ~StateAp(); + + /* Is the state final? */ + bool isFinState() { return stateBits & STB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inTrans; + CondInList inCond; + + /* Set only during scanner construction when actions are added. NFA to DFA + * code can ignore this. */ + StateAp *eofTarget; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + StateAp *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + StateAp *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + StateSet *stateDictIn; + + NfaTransList *nfaOut; + NfaInList *nfaIn; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + StateAp *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this state. */ + CondSpace *outCondSpace; + CondKeySet outCondKeys; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; + + PriorTable guardedInTable; + + /* Used by the NFA-based scanner to track the origin of final states. We + * only use it in cases where just one match is possible, starting with the + * final state duplicates that are drawn using NFA transitions. */ + LmItemSet lmNfaParts; +}; + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: {} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: {} + +template <class Item> struct PiList +{ + PiList() + : ptr(0) {} + + PiList( const DList<Item> &l ) + : ptr(l.head) {} + + PiList( Item *ptr ) + : ptr(ptr) {} + + operator Item *() const { return ptr; } + Item *operator->() const { return ptr; } + + bool end() { return ptr == 0; } + void clear() { ptr = 0; } + + PiList next() + { return PiList( ptr->next ); } + + Item *ptr; +}; + +template <class Item> struct PiSingle +{ + PiSingle() + : ptr(0) {} + + PiSingle( Item *ptr ) + : ptr(ptr) {} + + operator Item *() const { return ptr; } + Item *operator->() const { return ptr; } + + bool end() { return ptr == 0; } + void clear() { ptr = 0; } + + /* Next is always nil. */ + PiSingle next() + { return PiSingle( 0 ); } + + Item *ptr; +}; + +template <class Item> struct PiVector +{ + PiVector() + : ptr(0), length(0) {} + + PiVector( const Vector<Item> &v ) + : ptr(v.data), length(v.length()) {} + + PiVector( Item *ptr, long length ) + : ptr(ptr), length(length) {} + + operator Item *() const { return ptr; } + Item *operator->() const { return ptr; } + + bool end() { return length == 0; } + void clear() { ptr = 0; length = 0; } + + PiVector next() + { return PiVector( ptr + 1, length - 1 ); } + + Item *ptr; + long length; +}; + + +template <class ItemIter1, class ItemIter2 = ItemIter1> struct ValPairIter +{ + /* Encodes the states that are meaningful to the of caller the iterator. */ + enum UserState + { + RangeInS1, RangeInS2, + RangeOverlap, + }; + + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + ExactOverlap, End + }; + + ValPairIter( const ItemIter1 &list1, const ItemIter2 &list2 ); + + template <class ItemIter> struct NextTrans + { + CondKey key; + ItemIter trans; + ItemIter next; + + NextTrans() { key = 0; } + + void load() { + if ( trans.end() ) + next.clear(); + else { + next = trans->next; + key = trans->key; + } + } + + void set( const ItemIter &t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } + }; + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ItemIter1 list1; + ItemIter2 list2; + IterState itState; + UserState userState; + + NextTrans<ItemIter1> s1Tel; + NextTrans<ItemIter2> s2Tel; + Key bottomLow, bottomHigh; + ItemIter1 *bottomTrans1; + ItemIter2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ItemIter1, class ItemIter2> + ValPairIter<ItemIter1, ItemIter2>:: + ValPairIter( const ItemIter1 &list1, const ItemIter2 &list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ItemIter1, class ItemIter2> + void ValPairIter<ItemIter1, ItemIter2>::findNext() +{ + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans.end() ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( !s2Tel.trans.end() ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans.end() ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( !s1Tel.trans.end() ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.key < s2Tel.key ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.key < s1Tel.key ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + +template <class ItemIter1, class ItemIter2 = ItemIter1> struct RangePairIter +{ + /* Encodes the states that are meaningful to the of caller the iterator. */ + enum UserState + { + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 + }; + + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + RangePairIter( FsmCtx *ctx, const ItemIter1 &list1, const ItemIter2 &list2 ); + + template <class ItemIter> struct NextTrans + { + Key lowKey, highKey; + ItemIter trans; + ItemIter next; + + NextTrans() + { + highKey = 0; + lowKey = 0; + } + + void load() { + if ( trans.end() ) + next.clear(); + else { + next = trans.next(); + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( const ItemIter &t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } + }; + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + FsmCtx *ctx; + + /* Iterator state. */ + ItemIter1 list1; + ItemIter2 list2; + IterState itState; + UserState userState; + + NextTrans<ItemIter1> s1Tel; + NextTrans<ItemIter2> s2Tel; + Key bottomLow, bottomHigh; + ItemIter1 bottomTrans1; + ItemIter2 bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ItemIter1, class ItemIter2> RangePairIter<ItemIter1, ItemIter2>:: + RangePairIter( FsmCtx *ctx, const ItemIter1 &list1, const ItemIter2 &list2 ) +: + ctx(ctx), + list1(list1), + list2(list2), + itState(Begin) +{ + bottomLow = 0; + bottomHigh = 0; + findNext(); +} + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ItemIter1, class ItemIter2> + void RangePairIter<ItemIter1, ItemIter2>::findNext() +{ + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans.end() ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( !s2Tel.trans.end() ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans.end() ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( !s1Tel.trans.end() ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( ctx->keyOps->lt( s1Tel.highKey, s2Tel.lowKey ) ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( ctx->keyOps->lt( s2Tel.highKey, s1Tel.lowKey ) ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( ctx->keyOps->lt( s1Tel.lowKey, s2Tel.lowKey ) ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + ctx->keyOps->decrement( s1Tel.highKey ); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( ctx->keyOps->lt( s2Tel.lowKey, s1Tel.lowKey ) ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + ctx->keyOps->decrement( s2Tel.highKey ); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( ctx->keyOps->lt( s1Tel.highKey, s2Tel.highKey ) ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + ctx->keyOps->increment( bottomLow ); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( ctx->keyOps->lt( s2Tel.highKey, s1Tel.highKey ) ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + ctx->keyOps->increment( bottomLow ); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } + int compare( const StateAp *pState1, const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } + int compare( const StateAp *pState1, const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } + int compare( const StateAp *pState1, const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare( FsmCtx *ctx ) : ctx(ctx) { } + bool shouldMark( MarkIndex &markIndex, const StateAp *pState1, + const StateAp *pState2 ); + FsmCtx *ctx; +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector<TransEl> TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, StateAp* > EntryMapEl; +typedef BstMap< int, StateAp* > EntryMap; +typedef Vector<EntryMapEl> EntryMapBase; + +struct BreadthCost +{ + BreadthCost( std::string name, double cost ) + : name(name), cost(cost) {} + + std::string name; + double cost; +}; + +struct BreadthResult +{ + BreadthResult( double start ) : start(start) {} + + double start; + Vector<BreadthCost> costs; +}; + +/* Result of an operation. */ +struct FsmRes +{ + struct Fsm {}; + struct TooManyStates {}; + struct PriorInteraction {}; + struct CondCostTooHigh {}; + struct InternalError {}; + + enum Type + { + TypeFsm = 1, + TypeTooManyStates, + TypePriorInteraction, + TypeCondCostTooHigh, + TypeInternalError, + }; + + FsmRes( const Fsm &, FsmAp *fsm ) + : fsm(fsm), type(TypeFsm) {} + + FsmRes( const TooManyStates & ) + : fsm(0), type(TypeTooManyStates) {} + + FsmRes( const PriorInteraction &, long long guardId ) + : fsm(0), type(TypePriorInteraction), id(guardId) {} + + FsmRes( const CondCostTooHigh &, long long costId ) + : fsm(0), type(TypeCondCostTooHigh), id(costId) {} + + FsmRes( const InternalError & ) + : fsm(0), type(TypeInternalError) {} + + bool success() + { return fsm != 0; } + + operator FsmAp*() + { return type == TypeFsm ? fsm : 0; } + FsmAp *operator->() + { return type == TypeFsm ? fsm : 0; } + + FsmAp *fsm; + Type type; + long long id; +}; + +/* Graph class that implements actions and priorities. */ +struct FsmAp +{ + /* Constructors/Destructors. */ + FsmAp( FsmCtx *ctx ); + FsmAp( const FsmAp &graph ); + ~FsmAp(); + + FsmCtx *ctx; + + bool priorInteraction; + int guardId; + + /* The list of states. */ + StateList stateList; + StateList misfitList; + NfaStateList nfaList; + StateDict stateDict; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + StateAp *startState; + + /* Error state, possibly created only when the final machine has been + * created and the XML machine is about to be written. No transitions + * point to this state. */ + StateAp *errState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferOutActions( StateAp *state ); + void transferErrorActions( StateAp *state, int transferPoint ); + void setErrorActions( StateAp *state, const ActionTable &other ); + void setErrorAction( StateAp *state, int ordering, Action *action ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( StateAp *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, LongestMatchPart *lmPart ); + + /* Set conditions. */ + CondSpace *addCondSpace( const CondSet &condSet ); + + void convertToCondAp( StateAp *state ); + +private: + /* Can generate states. */ + void doEmbedCondition( StateAp *state, + const CondSet &set, const CondKeySet &vals ); + + +public: + static FsmRes embedCondition( FsmAp *fsm, StateAp *state, const CondSet &set, + const CondKeySet &vals ); + + FsmRes startFsmCondition( Action *condAction, bool sense ); + void allTransCondition( Action *condAction, bool sense ); + void leaveFsmCondition( Action *condAction, bool sense ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( StateAp *state ); + void unsetFinState( StateAp *state ); + + void setStartState( StateAp *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, StateAp *state ); + void changeEntry( int id, StateAp *to, StateAp *from ); + void unsetEntry( int id, StateAp *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + + void checkEpsilonRegularInteraction( const PriorTable &t1, const PriorTable &t2 ); + +private: + /* Can generate staes. */ + void shadowReadWriteStates(); + + void afterOpMinimize( bool lastInSeq = true ); + + void removeDups( ActionTable &table ); + +public: + + void removeActionDups(); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + template < class Head > void attachToInList( StateAp *from, + StateAp *to, Head *&head, Head *trans ); + template < class Head > void detachFromInList( StateAp *from, + StateAp *to, Head *&head, Head *trans ); + + void attachToNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ); + void detachFromNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ); + + void attachStateDict( StateAp *from, StateAp *to ); + void detachStateDict( StateAp *from, StateAp *to ); + + /* Attach with a new transition. */ + CondAp *attachNewCond( TransAp *trans, StateAp *from, + StateAp *to, CondKey onChar ); + TransAp *attachNewTrans( StateAp *from, StateAp *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( StateAp *from, StateAp *to, TransDataAp *trans ); + void attachTrans( StateAp *from, StateAp *to, CondAp *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( StateAp *from, StateAp *to, TransDataAp *trans ); + void redirectErrorTrans( StateAp *from, StateAp *to, CondAp *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( StateAp *from, StateAp *to, TransDataAp *trans ); + void detachTrans( StateAp *from, StateAp *to, CondAp *trans ); + + /* Detach a state from the graph. */ + void detachState( StateAp *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + TransDataAp *dupTransData( StateAp *from, TransDataAp *srcTrans ); + TransAp *dupTrans( StateAp *from, TransAp *srcTrans ); + CondAp *dupCondTrans( StateAp *from, TransAp *destParent, CondAp *srcTrans ); + +private: + /* In crossing, two transitions both go to real states. Can generate + * states. */ + template< class Trans > Trans *fsmAttachStates( + StateAp *from, Trans *destTrans, Trans *srcTrans ); + +public: + void expandConds( StateAp *fromState, TransAp *trans, + CondSpace *fromSpace, CondSpace *mergedSpace ); + TransAp *copyTransForExpansion( StateAp *fromState, TransAp *srcTrans ); + StateAp *copyStateForExpansion( StateAp *srcState ); + void freeEffectiveTrans( TransAp *srcTrans ); + +private: + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. Can generate states. */ + template< class Trans > Trans *mergeTrans( StateAp *from, + Trans *destTrans, Trans *srcTrans ); + +public: + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + void addOutCondition( StateAp *state, Action *condAction, bool sense ); + + void expandCondKeys( CondKeySet &condKeys, CondSpace *fromSpace, + CondSpace *mergedSpace ); + + /* Back to trans ap (minimmization) */ + TransDataAp *convertToTransAp( StateAp *from, CondAp *cond ); + + /* Cross a src transition with one that is already occupying a spot. */ + TransCondAp *convertToCondAp( StateAp *state, TransDataAp *trans ); + CondSpace *expandCondSpace( TransAp *destTrans, TransAp *srcTrans ); + +private: + /* Can generate states. */ + TransAp *crossTransitions( StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + TransDataAp *crossTransitionsBothPlain( StateAp *from, + TransDataAp *destTrans, TransDataAp *srcTrans ); + CondAp *crossCondTransitions( StateAp *from, + TransAp *destParent, CondAp *destTrans, CondAp *srcTrans ); + +public: + void prepareNfaRound(); + void finalizeNfaRound(); + + void outTransCopy( StateAp *dest, TransAp *srcList ); + void nfaMergeStates( StateAp *destState, StateAp **srcStates, int numSrc ); + void mergeOutConds( StateAp *destState, StateAp *srcState, bool leaving = false ); + void checkPriorInteractions( StateAp *destState, StateAp *srcState ); + void mergeNfaTransitions( StateAp *destState, StateAp *srcState ); + void mergeStateProperties( StateAp *destState, StateAp *srcState ); + void mergeStatesLeaving( StateAp *destState, StateAp *srcState ); + void mergeStateBits( StateAp *destState, StateAp *srcState ); + void mergeStates( StateAp *destState, StateAp *srcState, bool leaving = false ); + + /* Merge a set of states into destState. */ + void mergeStateList( StateAp *destState, StateAp **srcStates, int numSrc ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void cleanAbortedFill( StateAp *state ); + void cleanAbortedFill(); + bool overStateLimit(); + void nfaFillInStates(); + + /* + * Transition Comparison. + */ + + template< class Trans > int compareCondBitElim( Trans *trans1, Trans *trans2 ); + template< class Trans > int compareCondBitElimPtr( Trans *trans1, Trans *trans2 ); + int compareCondListBitElim( const CondList &condList1, const CondList &condList2 ); + + /* Compare priority and function table of transitions. */ + static int compareTransData( TransAp *trans1, TransAp *trans2 ); + template< class Trans > static int compareCondData( Trans *trans1, Trans *trans2 ); + + /* Compare transition data. Either of the pointers may be null. */ + static int compareTransDataPtr( TransAp *trans1, TransAp *trans2 ); + template< class Trans > static int compareCondDataPtr( Trans *trans1, Trans *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static int compareFullPtr( TransAp *trans1, TransAp *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static int compareTransPartPtr( TransAp *trans1, TransAp *trans2 ); + template< class Trans > static int compareCondPartPtr( Trans *trans1, Trans *trans2 ); + + static int comparePart( TransAp *trans1, TransAp *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static bool shouldMarkPtr( MarkIndex &markIndex, + TransAp *trans1, TransAp *trans2 ); + + /* + * Callbacks. + */ + + /* Add in the properties of srcTrans into this. */ + template< class Trans > void addInTrans( Trans *destTrans, Trans *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const StateAp *state1, const StateAp *state2 ); + + /* Out transition data. */ + void clearOutData( StateAp *state ); + bool hasOutData( StateAp *state ); + void transferOutData( StateAp *destState, StateAp *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + StateAp *addState(); + + /* + * Building basic machines + */ + + static FsmAp *concatFsm( FsmCtx *ctx, Key c ); + static FsmAp *concatFsmCI( FsmCtx *ctx, Key c ); + static FsmAp *concatFsm( FsmCtx *ctx, Key *str, int len ); + static FsmAp *concatFsmCI( FsmCtx *ctx, Key *str, int len ); + static FsmAp *orFsm( FsmCtx *ctx, Key *set, int len ); + static FsmAp *rangeFsm( FsmCtx *ctx, Key low, Key high ); + static FsmAp *rangeFsmCI( FsmCtx *ctx, Key low, Key high ); + static FsmAp *rangeStarFsm( FsmCtx *ctx, Key low, Key high ); + static FsmAp *emptyFsm( FsmCtx *ctx ); + static FsmAp *lambdaFsm( FsmCtx *ctx ); + static FsmAp *dotFsm( FsmCtx *ctx ); + static FsmAp *dotStarFsm( FsmCtx *ctx ); + static FsmAp *notRangeFsm( FsmCtx *ctx, Key low, Key high ); + + /* + * Fsm operators. + */ + + static FsmRes starOp( FsmAp *fsm ); + static FsmRes plusOp( FsmAp *fsm ); + static FsmRes questionOp( FsmAp *fsm ); + + static FsmRes exactRepeatOp( FsmAp *fsm, int times ); + static FsmRes maxRepeatOp( FsmAp *fsm, int times ); + static FsmRes minRepeatOp( FsmAp *fsm, int times ); + static FsmRes rangeRepeatOp( FsmAp *fsm, int lower, int upper ); + + static FsmRes concatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true, + StateSet *fromStates = 0, bool optional = false ); + static FsmRes unionOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + static FsmRes intersectOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + static FsmRes subtractOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + static FsmRes epsilonOp( FsmAp *fsm ); + static FsmRes joinOp( FsmAp *fsm, int startId, int finalId, FsmAp **others, int numOthers ); + + static FsmRes rightStartConcatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); + + void transferOutToNfaTrans( NfaTrans *trans, StateAp *state ); + + enum NfaRepeatMode { + NfaLegacy = 1, + NfaGreedy, + NfaLazy + }; + + static FsmRes applyNfaTrans( FsmAp *fsm, StateAp *fromState, StateAp *toState, NfaTrans *nfaTrans ); + + /* Results in an NFA. */ + static FsmRes nfaUnionOp( FsmAp *fsm, FsmAp **others, int n, int depth, std::ostream &stats ); + static FsmRes nfaRepeatOp( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit ); + + static FsmRes nfaRepeatOp2( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit, NfaRepeatMode mode = NfaGreedy ); + static FsmRes nfaWrap( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *exit, NfaRepeatMode mode = NfaGreedy ); + + static FsmRes nfaUnion( const NfaRoundVect &roundsList, FsmAp **machines, + int numMachines, std::ostream &stats, bool printStatistics ); + + static FsmRes condPlus( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ); + static FsmRes condStar( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ); + + /* Make a new start state that has no entry points. Will not change the + * meaning of the fsm. */ + static FsmRes isolateStartState( FsmAp *fsm ); + + /* + * Analysis Functions + */ + static FsmRes condCostFromState( FsmAp *fsm, StateAp *state, long depth ); + static FsmRes condCostSearch( FsmAp *fsm ); + static void breadthFromEntry( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state ); + static void breadthFromState( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state, + long depth, int maxDepth, double stateScore); + + /* + * Operator workers + */ + void globOp( FsmAp **others, int numOthers ); + void deterministicEntry(); + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * meaning of the fsm. */ + StateAp *dupStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, StateAp *targ ); + void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ); + void resolveEpsilonTrans(); + + static bool fillAbort( FsmRes &res, FsmAp *fsm ); + + static FsmRes fillInStates( FsmAp *fsm ); + + /* Workers for concatenation and union. */ + static FsmRes doUnion( FsmAp *fsm, FsmAp *other ); + static FsmRes doConcat( FsmAp *fsm, FsmAp *other, StateSet *fromStates, bool optional ); + + static void condCost( Action *action, long repId ); + static void applyEntryPriorGuard( FsmAp *fsm, long repId ); + static void applyRepeatPriorGuard( FsmAp *fsm, long repId ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmAp *other ); + + /* Ordering states. */ + void depthFirstOrdering( StateAp *state ); + void depthFirstOrdering(); + void sortStatesByFinal(); + + /* Set sqequential state numbers starting at 0. */ + void setStateNumbers( int base ); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + void unsetFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( StateAp *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( StateAp *state ); + void markReachableFromHereStopFinal( StateAp *state ); + + /* Any transitions to another state? */ + bool anyRegularTransitions( StateAp *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + long removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( StateAp *state ); + bool anyErrorRange( StateAp *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void moveInwardTrans(StateAp *dest, StateAp *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates( StateAp *dest, StateAp *src ); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); + + /* Returns true if there is a transtion (either explicit or by a gap) to + * the error state. */ + bool checkErrTrans( StateAp *state, TransAp *trans ); + bool checkErrTrans( StateAp *state, CondAp *trans ); + bool checkErrTransFinish( StateAp *state ); + bool hasErrorTrans(); + + /* Check if a machine defines a single character. This is useful in + * validating ranges and machines to export. */ + bool checkSingleCharMachine( ); + + bool elimCondBits(); +}; + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +template< class Trans > void FsmAp::addInTrans( Trans *destTrans, Trans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +template< class Trans > int FsmAp::compareCondDataPtr( Trans *trans1, Trans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareCondData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +template< class Trans > int FsmAp::compareCondBitElimPtr( Trans *trans1, Trans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareCondBitElim( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +#endif diff --git a/libfsm/fsmmin.cc b/libfsm/fsmmin.cc new file mode 100644 index 00000000..cabe3968 --- /dev/null +++ b/libfsm/fsmmin.cc @@ -0,0 +1,934 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" +#include "mergesort.h" + +struct MergeSortInitPartition + : public MergeSort<StateAp*, InitPartitionCompare> +{ + MergeSortInitPartition( FsmCtx *ctx ) + { + InitPartitionCompare::ctx = ctx; + } +}; + +struct MergeSortPartition + : public MergeSort<StateAp*, PartitionCompare> +{ + MergeSortPartition( FsmCtx *ctx ) + { + PartitionCompare::ctx = ctx; + } +}; + +struct MergeSortApprox + : public MergeSort<StateAp*, ApproxCompare> +{ + MergeSortApprox( FsmCtx *ctx ) + { + ApproxCompare::ctx = ctx; + } +}; + +int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSortPartition mergeSort( ctx ); + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + StateAp *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmAp::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSortInitPartition mergeSort( ctx ); + InitPartitionCompare initPartCompare( ctx ); + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSortPartition mergeSort( ctx ); + PartitionCompare partCompare( ctx ); + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + StateAp *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter t = state->inTrans; t.lte(); t++ ) { + MinPartition *fromPart = t->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + for ( CondInList::Iter t = state->inCond; t.lte(); t++ ) { + MinPartition *fromPart = t->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmAp::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSortInitPartition mergeSort( ctx ); + InitPartitionCompare initPartCompare( ctx ); + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmAp::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + StateAp *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare( ctx ); + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +#ifdef TO_UPGRADE_CONDS +bool FsmAp::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + StateAp *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare( ctx ); + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} +#endif + +#ifdef TO_UPGRADE_CONDS +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmAp::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers( 0 ); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} +#endif + +#ifdef TO_UPGRADE_CONDS +bool FsmAp::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSortApprox mergeSort( ctx ); + ApproxCompare approxCompare( ctx ); + + /* Fill up an array of pointers to the states. */ + StateAp **statePtrs = new StateAp*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + StateAp **pLast = statePtrs; + StateAp **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} +#endif + +#ifdef TO_UPGRADE_CONDS +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmAp::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} +#endif + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +long FsmAp::removeUnreachableStates() +{ + long origLen = stateList.length(); + + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state ) { + StateAp *next = state->next; + + if ( state->stateBits & STB_ISMARKED ) + state->stateBits &= ~ STB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } + + return origLen - stateList.length(); +} + +bool FsmAp::outListCovers( StateAp *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( ctx->keyOps->lt( ctx->keyOps->minKey, trans->lowKey ) ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + ctx->keyOps->decrement( lowKey ); + if ( ctx->keyOps->lt( trans->prev->highKey, lowKey ) ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( ctx->keyOps->lt( trans->highKey, ctx->keyOps->maxKey ) ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmAp::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + StateAp **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= STB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state != 0 ) { + StateAp *next = state->next; + + if ( state->stateBits & STB_ISMARKED ) + state->stateBits &= ~ STB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmAp::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + StateAp *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + moveInwardTrans( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + StateAp *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmAp::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + StateAp *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + StateAp *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + +/* Merge neighboring transitions that go to the same state and have the same + * transitions data. */ +void FsmAp::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + ctx->keyOps->decrement( nextLow ); + + /* Require there be no conditions in either of the merge + * candidates. */ + bool merge = false; + TransDataAp *td; + TransDataAp *tn; + + if ( trans->plain() && + next->plain() && + ctx->keyOps->eq( trans->highKey, nextLow ) ) + { + td = trans->tdap(); + tn = next->tdap(); + + /* Check the condition target and action data. */ + if ( td->toState == tn->toState && CmpActionTable::compare( + td->actionTable, tn->actionTable ) == 0 ) + { + merge = true; + } + } + + if ( merge ) { + trans->highKey = next->highKey; + st->outList.detach( tn ); + detachTrans( tn->fromState, tn->toState, tn ); + delete tn; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} + +bool FsmAp::elimCondBits() +{ + bool modified = false; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + restart: + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( !trans->plain() ) { + CondSpace *cs = trans->condSpace; + + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) { + long bit = 1 << csi.pos(); + + /* Sort into on and off lists. */ + CondList on; + CondList off; + TransCondAp *tcap = trans->tcap(); + while ( tcap->condList.length() > 0 ) { + CondAp *cond = tcap->condList.detachFirst(); + if ( cond->key.getVal() & bit ) { + cond->key = CondKey( cond->key.getVal() & ~bit ); + on.append( cond ); + } + else { + off.append( cond ); + } + } + + bool merge = false; + if ( on.length() > 0 && on.length() == off.length() ) { + /* test if the same */ + int cmpRes = compareCondListBitElim( on, off ); + if ( cmpRes == 0 ) + merge = true; + } + + if ( merge ) { + if ( cs->condSet.length() == 1 ) { + /* clear out the on-list. */ + while ( on.length() > 0 ) { + CondAp *cond = on.detachFirst(); + detachTrans( st, cond->toState, cond ); + } + + /* turn back into a plain transition. */ + CondAp *cond = off.detachFirst(); + TransAp *n = convertToTransAp( st, cond ); + TransAp *before = trans->prev; + st->outList.detach( trans ); + st->outList.addAfter( before, n ); + modified = true; + goto restart; + } + else + { + CondSet newSet = cs->condSet; + newSet.Vector<Action*>::remove( csi.pos(), 1 ); + trans->condSpace = addCondSpace( newSet ); + + /* clear out the on-list. */ + while ( on.length() > 0 ) { + CondAp *cond = on.detachFirst(); + detachTrans( st, cond->toState, cond ); + } + } + } + + /* Turn back into a single list. */ + while ( on.length() > 0 || off.length() > 0 ) { + if ( on.length() == 0 ) { + while ( off.length() > 0 ) + tcap->condList.append( off.detachFirst() ); + } + else if ( off.length() == 0 ) { + while ( on.length() > 0 ) { + CondAp *cond = on.detachFirst(); + cond->key = CondKey( cond->key.getVal() | bit ); + tcap->condList.append( cond ); + } + } + else { + if ( off.head->key.getVal() < ( on.head->key.getVal() | bit ) ) { + tcap->condList.append( off.detachFirst() ); + } + else { + CondAp *cond = on.detachFirst(); + cond->key = CondKey( cond->key.getVal() | bit ); + tcap->condList.append( cond ); + } + } + } + + if ( merge ) { + modified = true; + goto restart; + } + } + } + } + } + return modified; +} + +/* Perform minimization after an operation according + * to the command line args. */ +void FsmAp::afterOpMinimize( bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( ctx->minimizeOpt == MinimizeEveryOp || ( ctx->minimizeOpt == MinimizeMostOps && lastInSeq ) ) { + /* First clean up the graph. FsmAp operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + removeUnreachableStates(); + + switch ( ctx->minimizeLevel ) { + #ifdef TO_UPGRADE_CONDS + case MinimizeApprox: + minimizeApproximate(); + break; + #endif + case MinimizePartition1: + minimizePartition1(); + break; + case MinimizePartition2: + minimizePartition2(); + break; + #ifdef TO_UPGRADE_CONDS + case MinimizeStable: + minimizeStable(); + break; + #endif + } + } +} + diff --git a/libfsm/fsmnfa.cc b/libfsm/fsmnfa.cc new file mode 100644 index 00000000..cde4f82d --- /dev/null +++ b/libfsm/fsmnfa.cc @@ -0,0 +1,660 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "parsedata.h" + +using std::endl; + +void FsmAp::nfaFillInStates() +{ + long count = nfaList.length(); + + /* Can this lead to too many DFAs? Since the nfa merge is removing misfits, + * it is possible we remove a state that is on the nfa list, but we don't + * adjust count. */ + + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + while ( nfaList.length() > 0 && count-- ) { + StateAp *state = nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + nfaMergeStates( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + nfaList.detach( state ); + } +} + +void FsmAp::prepareNfaRound() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 && ! (st->stateBits & STB_NFA_REP) ) { + StateSet set; + for ( NfaTransList::Iter to = *st->nfaOut; to.lte(); to++ ) + set.insert( to->toState ); + + st->stateDictEl = new StateDictEl( set ); + st->stateDictEl->targState = st; + stateDict.insert( st->stateDictEl ); + delete st->nfaOut; + st->nfaOut = 0; + + nfaList.append( st ); + } + } +} + +void FsmAp::finalizeNfaRound() +{ + /* For any remaining NFA states, remove from the state dict. We need to + * keep the state sets. */ + for ( NfaStateList::Iter ns = nfaList; ns.lte(); ns++ ) + stateDict.detach( ns->stateDictEl ); + + /* Disassociate non-nfa states from their state dicts. */ + for ( StateDict::Iter sdi = stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete the state dict elements for non-nfa states. */ + stateDict.empty(); + + /* Transfer remaining stateDictEl sets to nfaOut. */ + while ( nfaList.length() > 0 ) { + StateAp *state = nfaList.head; + state->nfaOut = new NfaTransList; + for ( StateSet::Iter ss = state->stateDictEl->stateSet; ss.lte(); ss++ ) { + /* Attach it using the NFA transitions data structure (propigates + * to output). */ + NfaTrans *trans = new NfaTrans( /* 0, 0, */ 1 ); + state->nfaOut->append( trans ); + attachToNfa( state, *ss, trans ); + + detachStateDict( state, *ss ); + } + delete state->stateDictEl; + state->stateDictEl = 0; + nfaList.detach( state ); + } +} + +void FsmAp::nfaMergeStates( StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) { + mergeStates( destState, srcStates[s] ); + + while ( misfitList.length() > 0 ) { + StateAp *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + misfitList.detach( state ); + delete state; + } + } +} + + +/* + * WRT action ordering. + * + * All the pop restore actions get an ordering of -2 to cause them to always + * execute first. This is the action that restores the state and we need that + * to happen before any user actions. + */ +const int ORD_PUSH = 0; +const int ORD_RESTORE = -3; +const int ORD_COND = -1; +const int ORD_COND2 = -2; +const int ORD_TEST = 1073741824; + +void FsmAp::transferOutToNfaTrans( NfaTrans *trans, StateAp *state ) +{ + trans->popFrom = state->fromStateActionTable; + trans->popCondSpace = state->outCondSpace; + trans->popCondKeys = state->outCondKeys; + trans->priorTable.setPriors( state->outPriorTable ); + trans->popAction.setActions( state->outActionTable ); +} + +FsmRes FsmAp::nfaWrap( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *exit, NfaRepeatMode mode ) +{ + /* + * First Concat. + */ + StateSet origFinals = fsm->finStateSet; + + /* Get the orig start state. */ + StateAp *origStartState = fsm->startState; + + /* New start state. */ + StateAp *newStart = fsm->addState(); + + newStart->nfaOut = new NfaTransList; + + const int orderInit = 0; + const int orderStay = mode == NfaGreedy ? 3 : 1; + const int orderExit = mode == NfaGreedy ? 1 : 3; + + NfaTrans *trans; + if ( init ) { + /* Transition into the repetition. Doesn't make much sense to flip this + * statically false, but provided for consistency of interface. Allows + * an init so we can have only local state manipulation. */ + trans = new NfaTrans( orderInit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, init ); + + newStart->nfaOut->append( trans ); + fsm->attachToNfa( newStart, origStartState, trans ); + } + + StateAp *newFinal = fsm->addState(); + + for ( StateSet::Iter orig = origFinals; orig.lte(); orig++ ) { + /* For every final state, we place a new final state in front of it, + * with an NFA transition to the original. This is the "stay" choice. */ + StateAp *repl = fsm->addState(); + fsm->moveInwardTrans( repl, *orig ); + + repl->nfaOut = new NfaTransList; + + if ( stay != 0 ) { + /* Transition to original final state. Represents staying. */ + trans = new NfaTrans( orderStay ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, stay ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, *orig, trans ); + } + + if ( exit != 0 ) { + /* Transition to thew new final. Represents exiting. */ + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, newFinal, trans ); + } + + fsm->unsetFinState( *orig ); + } + + fsm->unsetStartState(); + fsm->setStartState( newStart ); + fsm->setFinState( newFinal ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +FsmRes FsmAp::nfaRepeatOp2( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit, NfaRepeatMode mode ) +{ + /* + * First Concat. + */ + StateSet origFinals = fsm->finStateSet; + + /* Get the orig start state. */ + StateAp *origStartState = fsm->startState; + StateAp *repStartState = fsm->dupStartState(); + + /* New start state. */ + StateAp *newStart1 = fsm->addState(); + StateAp *newStart2 = fsm->addState(); + + newStart1->nfaOut = new NfaTransList; + newStart2->nfaOut = new NfaTransList; + + const int orderInit = 0; + const int orderStay = mode == NfaGreedy ? 3 : 1; + const int orderRepeat = mode == NfaGreedy ? 2 : 2; + const int orderExit = mode == NfaGreedy ? 1 : 3; + + NfaTrans *trans; + if ( init ) { + /* Transition into the repetition. Doesn't make much sense to flip this + * statically false, but provided for consistency of interface. Allows + * an init so we can have only local state manipulation. */ + trans = new NfaTrans( orderInit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, init ); + + newStart1->nfaOut->append( trans ); + fsm->attachToNfa( newStart1, newStart2, trans ); + } + + StateAp *newFinal = fsm->addState(); + + if ( exit ) { + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + newStart2->nfaOut->append( trans ); + fsm->attachToNfa( newStart1, newFinal, trans ); + } + + if ( repeat ) { + trans = new NfaTrans( orderRepeat ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, repeat ); + + newStart2->nfaOut->append( trans ); + fsm->attachToNfa( newStart1, origStartState, trans ); + } + + for ( StateSet::Iter orig = origFinals; orig.lte(); orig++ ) { + /* For every final state, we place a new final state in front of it, + * with an NFA transition to the original. This is the "stay" choice. */ + StateAp *repl = fsm->addState(); + fsm->moveInwardTrans( repl, *orig ); + + repl->nfaOut = new NfaTransList; + + if ( stay != 0 ) { + /* Transition to original final state. Represents staying. */ + trans = new NfaTrans( orderStay ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, stay ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, *orig, trans ); + } + + /* Transition back to the start. Represents repeat. */ + if ( repeat != 0 ) { + trans = new NfaTrans( orderRepeat ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, repeat ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, repStartState, trans ); + } + + if ( exit != 0 ) { + /* Transition to thew new final. Represents exiting. */ + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, newFinal, trans ); + } + + fsm->unsetFinState( *orig ); + } + + fsm->unsetStartState(); + fsm->setStartState( newStart1 ); + fsm->setFinState( newFinal ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* This version contains the init, increment and test in the nfa pop actions. + * This is a compositional operator since it doesn't leave any actions to + * trailing characters, where they may interact with other actions that use the + * same variables. */ +FsmRes FsmAp::nfaRepeatOp( FsmAp *fsm, Action *push, Action *pop, Action *init, + Action *stay, Action *repeat, Action *exit ) +{ + /* + * First Concat. + */ + StateSet origFinals = fsm->finStateSet; + + /* Get the orig start state. */ + StateAp *origStartState = fsm->startState; + StateAp *repStartState = fsm->dupStartState(); + + /* New start state. */ + StateAp *newStart = fsm->addState(); + + newStart->nfaOut = new NfaTransList; + + NfaTrans *trans; + if ( init ) { + /* Transition into the repetition. Doesn't make much sense to flip this + * statically false, but provided for consistency of interface. Allows + * an init so we can have only local state manipulation. */ + trans = new NfaTrans( 1 ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, init ); + + newStart->nfaOut->append( trans ); + fsm->attachToNfa( newStart, origStartState, trans ); + } + + StateAp *newFinal = fsm->addState(); + + for ( StateSet::Iter orig = origFinals; orig.lte(); orig++ ) { + /* For every final state, we place a new final state in front of it, + * with an NFA transition to the original. This is the "stay" choice. */ + StateAp *repl = fsm->addState(); + fsm->moveInwardTrans( repl, *orig ); + + repl->nfaOut = new NfaTransList; + + const int orderStay = 3; + const int orderRepeat = 2; + const int orderExit = 1; + + if ( stay != 0 ) { + /* Transition to original final state. Represents staying. */ + trans = new NfaTrans( orderStay ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, stay ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, *orig, trans ); + } + + /* Transition back to the start. Represents repeat. */ + if ( repeat != 0 ) { + trans = new NfaTrans( orderRepeat ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, repeat ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, repStartState, trans ); + } + + if ( exit != 0 ) { + /* Transition to thew new final. Represents exiting. */ + trans = new NfaTrans( orderExit ); + + trans->pushTable.setAction( ORD_PUSH, push ); + trans->restoreTable.setAction( ORD_RESTORE, pop ); + trans->popTest.setAction( ORD_TEST, exit ); + + fsm->transferOutToNfaTrans( trans, *orig ); + repl->fromStateActionTable.setActions( (*orig)->fromStateActionTable ); + + repl->nfaOut->append( trans ); + fsm->attachToNfa( repl, newFinal, trans ); + } + + fsm->unsetFinState( *orig ); + } + + fsm->unsetStartState(); + fsm->setStartState( newStart ); + fsm->setFinState( newFinal ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* Unions others with fsm. Others are deleted. */ +FsmRes FsmAp::nfaUnionOp( FsmAp *fsm, FsmAp **others, int n, int depth, ostream &stats ) +{ + /* Mark existing NFA states as NFA_REP states, which excludes them from the + * prepare NFA round. We must treat them as final NFA states and not try to + * make them deterministic. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) + st->stateBits |= STB_NFA_REP; + } + + for ( int o = 0; o < n; o++ ) { + for ( StateList::Iter st = others[o]->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) + st->stateBits |= STB_NFA_REP; + } + } + + for ( int o = 0; o < n; o++ ) + assert( fsm->ctx == others[o]->ctx ); + + /* Not doing misfit accounting here. If we wanted to, it would need to be + * made nfa-compatibile. */ + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( fsm->startState ); + for ( int o = 0; o < n; o++ ) + startStateSet.insert( others[o]->startState ); + + /* Both of the original start states loose their start state status. */ + fsm->unsetStartState(); + for ( int o = 0; o < n; o++ ) + others[o]->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + for ( int o = 0; o < n; o++ ) { + fsm->copyInEntryPoints( others[o] ); + others[o]->entryPoints.empty(); + } + + for ( int o = 0; o < n; o++ ) { + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + fsm->stateList.append( others[o]->stateList ); + fsm->misfitList.append( others[o]->misfitList ); + // nfaList.append( others[o]->nfaList ); + } + + for ( int o = 0; o < n; o++ ) { + /* Move the final set data from other into this. */ + fsm->finStateSet.insert( others[o]->finStateSet ); + others[o]->finStateSet.empty(); + } + + for ( int o = 0; o < n; o++ ) { + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[o]; + } + + /* Create a new start state. */ + fsm->setStartState( fsm->addState() ); + + if ( depth == 0 ) { + fsm->startState->stateDictEl = new StateDictEl( startStateSet ); + fsm->nfaList.append( fsm->startState ); + + for ( StateSet::Iter s = startStateSet; s.lte(); s++ ) { + NfaTrans *trans = new NfaTrans( /* 0, 0, */ 0 ); + + if ( fsm->startState->nfaOut == 0 ) + fsm->startState->nfaOut = new NfaTransList; + + fsm->startState->nfaOut->append( trans ); + fsm->attachToNfa( fsm->startState, *s, trans ); + } + } + else { + /* Merge the start states. */ + if ( fsm->ctx->printStatistics ) + stats << "nfa-fill-round\t0" << endl; + + fsm->nfaMergeStates( fsm->startState, startStateSet.data, startStateSet.length() ); + + long removed = fsm->removeUnreachableStates(); + if ( fsm->ctx->printStatistics ) + stats << "round-unreach\t" << removed << endl; + + /* Fill in any new states made from merging. */ + for ( long i = 1; i < depth; i++ ) { + if ( fsm->ctx->printStatistics ) + stats << "nfa-fill-round\t" << i << endl; + + if ( fsm->nfaList.length() == 0 ) + break; + + fsm->nfaFillInStates( ); + + long removed = fsm->removeUnreachableStates(); + if ( fsm->ctx->printStatistics ) + stats << "round-unreach\t" << removed << endl; + } + + fsm->finalizeNfaRound(); + + long maxStateSetSize = 0; + long count = 0; + for ( StateList::Iter s = fsm->stateList; s.lte(); s++ ) { + if ( s->nfaOut != 0 && s->nfaOut->length() > 0 ) { + count += 1; + if ( s->nfaOut->length() > maxStateSetSize ) + maxStateSetSize = s->nfaOut->length(); + } + } + + if ( fsm->ctx->printStatistics ) { + stats << "fill-list\t" << count << endl; + stats << "state-dict\t" << fsm->stateDict.length() << endl; + stats << "states\t" << fsm->stateList.length() << endl; + stats << "max-ss\t" << maxStateSetSize << endl; + } + + fsm->removeUnreachableStates(); + + if ( fsm->ctx->printStatistics ) + stats << "post-unreachable\t" << fsm->stateList.length() << endl; + + fsm->minimizePartition2(); + + if ( fsm->ctx->printStatistics ) { + stats << "post-min\t" << fsm->stateList.length() << std::endl; + stats << std::endl; + } + } + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +FsmRes FsmAp::nfaUnion( const NfaRoundVect &roundsList, + FsmAp **machines, int numMachines, + std::ostream &stats, bool printStatistics ) +{ + long sumPlain = 0, sumMin = 0; + for ( int i = 0; i < numMachines; i++ ) { + sumPlain += machines[i]->stateList.length(); + + machines[i]->removeUnreachableStates(); + machines[i]->minimizePartition2(); + + sumMin += machines[i]->stateList.length(); + } + + if ( printStatistics ) { + stats << "sum-plain\t" << sumPlain << endl; + stats << "sum-minimized\t" << sumMin << endl; + } + + /* For each round. */ + for ( NfaRoundVect::Iter r = roundsList; r.lte(); r++ ) { + + if ( printStatistics ) { + stats << "depth\t" << r->depth << endl; + stats << "grouping\t" << r->groups << endl; + } + + int numGroups = 0; + int start = 0; + while ( start < numMachines ) { + /* If nfa-group-max is zero, don't group, put all terms into a single + * n-depth NFA. */ + int amount = r->groups == 0 ? numMachines : r->groups; + if ( ( start + amount ) > numMachines ) + amount = numMachines - start; + + FsmAp **others = machines + start + 1; + FsmRes res = FsmAp::nfaUnionOp( machines[start], others, (amount - 1), r->depth, stats ); + machines[start] = res.fsm; + + start += amount; + numGroups++; + } + + if ( numGroups == 1 ) + break; + + /* Move the group starts into the groups array. */ + FsmAp **groups = new FsmAp*[numGroups]; + int g = 0; + start = 0; + while ( start < numMachines ) { + groups[g] = machines[start]; + start += r->groups == 0 ? numMachines : r->groups; + g++; + } + + delete[] machines; + machines = groups; + numMachines = numGroups; + } + + FsmAp *ret = machines[0]; + return FsmRes( FsmRes::Fsm(), ret ); +} diff --git a/libfsm/fsmstate.cc b/libfsm/fsmstate.cc new file mode 100644 index 00000000..03a4df34 --- /dev/null +++ b/libfsm/fsmstate.cc @@ -0,0 +1,603 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" + +#include <string.h> +#include <assert.h> +#include <iostream> + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +StateAp::StateAp() +: + /* No out or in transitions. */ + outList(), + inTrans(), + inCond(), + + /* No EOF target. */ + eofTarget(0), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + stateDictIn(0), + + nfaOut(0), + nfaIn(0), + + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSpace(0), + outCondKeys(), + errActionTable(), + eofActionTable(), + guardedInTable(), + lmNfaParts() +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmAp copy constructor. */ +StateAp::StateAp(const StateAp &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inTrans(), + inCond(), + + /* Set this using the original state's eofTarget. It will get mapped back + * to the new machine in the Fsm copy constructor. */ + eofTarget(other.eofTarget), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + stateDictIn(0), + + nfaOut(0), + nfaIn(0), + + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSpace(other.outCondSpace), + outCondKeys(other.outCondKeys), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable), + + guardedInTable(other.guardedInTable), + lmNfaParts(other.lmNfaParts) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + /* Duplicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + TransDataAp *newTrans = new TransDataAp( *trans->tdap() ); + assert( trans->tdap()->lmActionTable.length() == 0 ); + newTrans->toState = trans->tdap()->toState; + outList.append( newTrans ); + } + else { + /* Duplicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + TransAp *newTrans = new TransCondAp( *trans->tcap() ); + + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++ ) { + CondAp *newCondTrans = new CondAp( *cti, newTrans ); + newCondTrans->key = cti->key; + + newTrans->tcap()->condList.append( newCondTrans ); + + assert( cti->lmActionTable.length() == 0 ); + + newCondTrans->toState = cti->toState; + } + + outList.append( newTrans ); + } + } + + /* Dup the nfa trans. */ + if ( other.nfaOut != 0 ) { + nfaOut = new NfaTransList; + for ( NfaTransList::Iter trans = *other.nfaOut; trans.lte(); trans++ ) { + NfaTrans *newtrans = new NfaTrans( *trans ); + newtrans->toState = trans->toState; + + nfaOut->append( newtrans ); + } + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +StateAp::~StateAp() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; + + if ( stateDictIn != 0 ) + delete stateDictIn; + + if ( nfaIn != 0 ) + delete nfaIn; + + if ( nfaOut != 0 ) { + nfaOut->empty(); + delete nfaOut; + } +} + +#ifdef TO_UPGRADE_CONDS +/* Compare two states using pointers to the states. With the approximate + * compare, the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & STB_ISFINAL) && !(state2->stateBits & STB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & STB_ISFINAL) && (state2->stateBits & STB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + RangePairIter<TransAp> outPair( ctx, state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIter<TransAp>::RangeInS1: + compareRes = FsmAp::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeInS2: + compareRes = FsmAp::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::RangeOverlap: + compareRes = FsmAp::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIter<TransAp>::BreakS1: + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + /* Check EOF targets. */ + if ( state1->eofTarget < state2->eofTarget ) + return -1; + else if ( state1->eofTarget > state2->eofTarget ) + return 1; + + if ( state1->guardedIn || !state2->guardedIn ) + return -1; + else if ( !state1->guardedIn || state2->guardedIn ) + return 1; + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} +#endif + + +/* Compare class used in the initial partition. */ +int InitPartitionCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + if ( state1->nfaOut == 0 && state2->nfaOut != 0 ) + return -1; + else if ( state1->nfaOut != 0 && state2->nfaOut == 0 ) + return 1; + else if ( state1->nfaOut != 0 ) { + compareRes = CmpNfaTransList::compare( + *state1->nfaOut, *state2->nfaOut ); + if ( compareRes != 0 ) + return compareRes; + } + + /* Test final state status. */ + if ( (state1->stateBits & STB_ISFINAL) && !(state2->stateBits & STB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & STB_ISFINAL) && (state2->stateBits & STB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the transition pairs. */ + typedef RangePairIter< PiList<TransAp> > RangePairIterPiListTransAp; + RangePairIterPiListTransAp + outPair( ctx, state1->outList, state2->outList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIterPiListTransAp::RangeInS1: + compareRes = FsmAp::compareTransDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIterPiListTransAp::RangeInS2: + compareRes = FsmAp::compareTransDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIterPiListTransAp::RangeOverlap: + compareRes = FsmAp::compareTransDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIterPiListTransAp::BreakS1: + case RangePairIterPiListTransAp::BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + typedef RangePairIter< PiList<TransAp> > RangePairIterPiListTransAp; + RangePairIterPiListTransAp outPair( ctx, state1->outList, state2->outList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIterPiListTransAp::RangeInS1: + compareRes = FsmAp::compareTransPartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIterPiListTransAp::RangeInS2: + compareRes = FsmAp::compareTransPartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIterPiListTransAp::RangeOverlap: + compareRes = FsmAp::compareTransPartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangePairIterPiListTransAp::BreakS1: + case RangePairIterPiListTransAp::BreakS2: + break; + } + } + + /* Test eof targets. */ + if ( state1->eofTarget == 0 && state2->eofTarget != 0 ) + return -1; + else if ( state1->eofTarget != 0 && state2->eofTarget == 0 ) + return 1; + else if ( state1->eofTarget != 0 ) { + /* Both eof targets are set. */ + compareRes = CmpOrd< MinPartition* >::compare( + state1->eofTarget->alg.partition, state2->eofTarget->alg.partition ); + if ( compareRes != 0 ) + return compareRes; + } + + return 0; +} + +#ifdef TO_UPGRADE_CONDS +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const StateAp *state1, + const StateAp *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + RangePairIter<TransAp> outPair( ctx, state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangePairIter<TransAp>::RangeInS1: + if ( FsmAp::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangePairIter<TransAp>::RangeInS2: + if ( FsmAp::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangePairIter<TransAp>::RangeOverlap: + if ( FsmAp::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case RangePairIter<TransAp>::BreakS1: + case RangePairIter<TransAp>::BreakS2: + break; + } + } + + return false; +} +#endif + +/* + * Transition Comparison. + */ + +int FsmAp::comparePart( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1->plain() ) { + int compareRes = FsmAp::compareCondPartPtr( trans1->tdap(), trans2->tdap() ); + if ( compareRes != 0 ) + return compareRes; + } + else { + /* Use a pair iterator to get the transition pairs. */ + typedef ValPairIter< PiList<CondAp> > ValPairIterPiListCondAp; + ValPairIterPiListCondAp outPair( trans1->tcap()->condList, + trans2->tcap()->condList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case ValPairIterPiListCondAp::RangeInS1: { + int compareRes = FsmAp::compareCondPartPtr<CondAp>( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + } + + case ValPairIterPiListCondAp::RangeInS2: { + int compareRes = FsmAp::compareCondPartPtr<CondAp>( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + } + + case ValPairIterPiListCondAp::RangeOverlap: { + int compareRes = FsmAp::compareCondPartPtr<CondAp>( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + }} + } + } + + return 0; +} + +/* Compare target partitions. Either pointer may be null. */ +int FsmAp::compareTransPartPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + return comparePart( trans1, trans2 ); + } + + return 0; +} + +template< class Trans > int FsmAp::compareCondPartPtr( Trans *trans1, Trans *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmAp::compareTransDataPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +#ifdef TO_UPGRADE_CONDS +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmAp::compareFullPtr( TransAp *trans1, TransAp *trans2 ) +{ + /* << "FIXME: " << __PRETTY_FUNCTION__ << std::endl; */ + + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( tai(trans1)->tcap()->condList.head->toState < tai(trans2)->tcap()->condList.head->toState ) + return -1; + else if ( tai(trans1)->tcap()->condList.head->toState > tai(trans2)->tcap()->condList.head->toState ) + return 1; + else if ( tai(trans1)->tcap()->condList.head->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} +#endif + +#ifdef TO_UPGRADE_CONDS +bool FsmAp::shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1, + TransAp *trans2 ) +{ + /* << "FIXME: " << __PRETTY_FUNCTION__ << std::endl; */ + + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( tai(trans1)->tcap()->condList.head->toState->alg.stateNum, + tai(trans2)->tcap()->condList.head->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} +#endif diff --git a/libfsm/gendata.cc b/libfsm/gendata.cc new file mode 100644 index 00000000..c44f7049 --- /dev/null +++ b/libfsm/gendata.cc @@ -0,0 +1,1733 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "gendata.h" +#include "ragel.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include "inputdata.h" +#include "version.h" + +#include <string.h> +#include <iostream> + +string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +void openHostBlock( char opener, InputData *id, ostream &out, const char *fileName, int line ) +{ + out << "host( \""; + for ( const char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } + out << "\", " << line << " ) " << opener << "{"; +} + +void Reducer::appendTrans( TransListVect &outList, Key lowKey, + Key highKey, TransAp *trans ) +{ + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 || trans->tdap()->actionTable.length() > 0 ) + outList.append( TransEl( lowKey, highKey, trans ) ); + } + else { + /* Add once if any cond has a to-state or an action table. */ + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 || cond->actionTable.length() > 0 ) { + outList.append( TransEl( lowKey, highKey, trans ) ); + break; + } + } + } +} + +void Reducer::reduceActionTables() +{ + /* Reduce the actions tables to a set. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + RedActionTable *actionTable = 0; + + /* Reduce To State Actions. */ + if ( st->toStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce From State Actions. */ + if ( st->fromStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce EOF actions. */ + if ( st->eofActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Loop the transitions and reduce their actions. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->actionTable.length() > 0 ) { + if ( actionTableMap.insert( trans->tdap()->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->actionTable.length() > 0 ) { + if ( actionTableMap.insert( cond->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } + } + + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + if ( actionTableMap.insert( n->pushTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + + if ( actionTableMap.insert( n->restoreTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + + if ( actionTableMap.insert( n->popAction, &actionTable ) ) + actionTable->id = nextActionTableId++; + + if ( actionTableMap.insert( n->popTest, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } +} + + +void Reducer::makeText( GenInlineList *outList, InlineItem *item ) +{ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Text ); + inlineItem->data = item->data; + + outList->append( inlineItem ); +} + +void Reducer::makeTargetItem( GenInlineList *outList, NameInst *nameTarg, + GenInlineItem::Type type ) +{ + long targetState; + if ( fsmCtx->generatingSectionSubset ) + targetState = -1; + else { + EntryMapEl *targ = fsm->entryPoints.find( nameTarg->id ); + targetState = targ->value->alg.stateNum; + } + + /* Make the item. */ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), type ); + inlineItem->targId = targetState; + outList->append( inlineItem ); +} + + +void Reducer::makeSubList( GenInlineList *outList, const InputLoc &loc, + InlineList *inlineList, GenInlineItem::Type type ) +{ + /* Fill the sub list. */ + GenInlineList *subList = new GenInlineList; + makeGenInlineList( subList, inlineList ); + + /* Make the item. */ + GenInlineItem *inlineItem = new GenInlineItem( loc, type ); + inlineItem->children = subList; + outList->append( inlineItem ); +} + +/* Make a sublist item with a given type. */ +void Reducer::makeSubList( GenInlineList *outList, + InlineList *inlineList, GenInlineItem::Type type ) +{ + makeSubList( outList, InputLoc(), inlineList, type ); +} + +void Reducer::makeLmOnLast( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 1 ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmOnNext( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 0 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmHold ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeExecGetTokend( GenInlineList *outList ) +{ + /* Make the Exec item. */ + GenInlineItem *execItem = new GenInlineItem( InputLoc(), GenInlineItem::LmExec ); + execItem->children = new GenInlineList; + + /* Make the GetTokEnd */ + GenInlineItem *getTokend = new GenInlineItem( InputLoc(), GenInlineItem::LmGetTokEnd ); + execItem->children->append( getTokend ); + + outList->append( execItem ); +} + +void Reducer::makeLmOnLagBehind( GenInlineList *outList, InlineItem *item ) +{ + /* Jump to the tokend. */ + makeExecGetTokend( outList ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmSwitch( GenInlineList *outList, InlineItem *item ) +{ + GenInlineItem *lmSwitch = new GenInlineItem( InputLoc(), GenInlineItem::LmSwitch ); + GenInlineList *lmList = lmSwitch->children = new GenInlineList; + LongestMatch *longestMatch = item->longestMatch; + + /* We can't put the <exec> here because we may need to handle the error + * case and in that case p should not be changed. Instead use a default + * label in the switch to adjust p when user actions are not set. An id of + * -1 indicates the default. */ + + if ( longestMatch->lmSwitchHandlesError ) { + /* If the switch handles error then we should have also forced the + * error state. */ + assert( fsm->errState != 0 ); + + GenInlineItem *errCase = new GenInlineItem( InputLoc(), GenInlineItem::HostStmt ); + errCase->lmId = 0; + errCase->children = new GenInlineList; + + GenInlineItem *host = new GenInlineItem( item->loc, GenInlineItem::HostStmt ); + host->children = new GenInlineList; + errCase->children->append( host ); + + /* Make the item. This should probably be an LM goto, would eliminate + * need for wrapping in host statement. .*/ + GenInlineItem *gotoItem = new GenInlineItem( InputLoc(), GenInlineItem::Goto ); + gotoItem->targId = fsm->errState->alg.stateNum; + host->children->append( gotoItem ); + + lmList->append( errCase ); + } + + bool needDefault = false; + for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect ) { + if ( lmi->action == 0 ) + needDefault = true; + else { + /* Open the action. Write it with the context that sets up _p + * when doing control flow changes from inside the machine. */ + GenInlineItem *lmCase = new GenInlineItem( InputLoc(), GenInlineItem::LmCase ); + lmCase->lmId = lmi->longestMatchId; + lmCase->children = new GenInlineList; + + makeExecGetTokend( lmCase->children ); + + GenInlineItem *subHost = new GenInlineItem( lmi->action->loc, + GenInlineItem::HostStmt ); + subHost->children = new GenInlineList; + makeGenInlineList( subHost->children, lmi->action->inlineList ); + lmCase->children->append( subHost ); + + lmList->append( lmCase ); + } + } + } + + if ( needDefault ) { + GenInlineItem *defCase = new GenInlineItem( item->loc, GenInlineItem::HostStmt ); + defCase->lmId = -1; + defCase->children = new GenInlineList; + + makeExecGetTokend( defCase->children ); + + lmList->append( defCase ); + } + + outList->append( lmSwitch ); +} + +void Reducer::makeLmNfaOnNext( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 0 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmHold ) ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::NfaClear ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmNfaOnEof( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 0 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::NfaClear ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + +void Reducer::makeLmNfaOnLast( GenInlineList *outList, InlineItem *item ) +{ + makeSetTokend( outList, 1 ); + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::NfaClear ) ); + + if ( item->longestMatchPart->action != 0 ) { + Action *action = item->longestMatchPart->action; + makeSubList( outList, action->loc, action->inlineList, + GenInlineItem::HostStmt ); + } +} + + +void Reducer::makeSetTokend( GenInlineList *outList, long offset ) +{ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokEnd ); + inlineItem->offset = offset; + outList->append( inlineItem ); +} + +void Reducer::makeSetAct( GenInlineList *outList, long lmId ) +{ + GenInlineItem *inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetActId ); + inlineItem->lmId = lmId; + outList->append( inlineItem ); +} + +void Reducer::makeGenInlineList( GenInlineList *outList, InlineList *inList ) +{ + for ( InlineList::Iter item = *inList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + makeText( outList, item ); + break; + case InlineItem::Goto: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Goto ); + break; + case InlineItem::GotoExpr: + makeSubList( outList, item->children, GenInlineItem::GotoExpr ); + break; + case InlineItem::Call: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Call ); + break; + case InlineItem::CallExpr: + makeSubList( outList, item->children, GenInlineItem::CallExpr ); + break; + case InlineItem::Ncall: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Ncall ); + break; + case InlineItem::NcallExpr: + makeSubList( outList, item->children, GenInlineItem::NcallExpr ); + break; + case InlineItem::Next: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Next ); + break; + case InlineItem::NextExpr: + makeSubList( outList, item->children, GenInlineItem::NextExpr ); + break; + case InlineItem::Break: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Break ) ); + break; + case InlineItem::Nbreak: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Nbreak ) ); + break; + case InlineItem::Ret: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Ret ) ); + break; + case InlineItem::Nret: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Nret ) ); + break; + case InlineItem::PChar: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::PChar ) ); + break; + case InlineItem::Char: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Char ) ); + break; + case InlineItem::Curs: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Curs ) ); + break; + case InlineItem::Targs: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Targs ) ); + break; + case InlineItem::Entry: + makeTargetItem( outList, item->nameTarg, GenInlineItem::Entry ); + break; + + case InlineItem::Hold: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::Hold ) ); + break; + case InlineItem::Exec: + makeSubList( outList, item->children, GenInlineItem::Exec ); + break; + + case InlineItem::LmSetActId: + makeSetAct( outList, item->longestMatchPart->longestMatchId ); + break; + case InlineItem::LmSetTokEnd: + makeSetTokend( outList, 1 ); + break; + + case InlineItem::LmOnLast: + makeLmOnLast( outList, item ); + break; + case InlineItem::LmOnNext: + makeLmOnNext( outList, item ); + break; + case InlineItem::LmOnLagBehind: + makeLmOnLagBehind( outList, item ); + break; + case InlineItem::LmSwitch: + makeLmSwitch( outList, item ); + break; + + case InlineItem::LmNfaOnLast: + makeLmNfaOnLast( outList, item ); + break; + case InlineItem::LmNfaOnNext: + makeLmNfaOnNext( outList, item ); + break; + case InlineItem::LmNfaOnEof: + makeLmNfaOnEof( outList, item ); + break; + + case InlineItem::LmInitAct: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmInitAct ) ); + break; + case InlineItem::LmInitTokStart: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmInitTokStart ) ); + break; + case InlineItem::LmSetTokStart: + outList->append( new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokStart ) ); + hasLongestMatch = true; + break; + case InlineItem::Stmt: + makeSubList( outList, item->children, GenInlineItem::GenStmt ); + break; + case InlineItem::Subst: { + /* Find the subst action. */ + Action *subst = curInlineAction->argList->data[item->substPos]; + makeGenInlineList( outList, subst->inlineList ); + break; + } + case InlineItem::NfaWrapAction: { + GenAction *wrap = allActions + item->wrappedAction->actionId; + GenInlineItem *gii = new GenInlineItem( InputLoc(), + GenInlineItem::NfaWrapAction ); + gii->wrappedAction = wrap; + outList->append( gii ); + break; + } + case InlineItem::NfaWrapConds: { + GenCondSpace *condSpace = allCondSpaces + item->condSpace->condSpaceId; + + GenInlineItem *gii = new GenInlineItem( InputLoc(), + GenInlineItem::NfaWrapConds ); + gii->condSpace = condSpace; + gii->condKeySet = item->condKeySet; + outList->append( gii ); + break; + }} + } +} + +void Reducer::makeExports() +{ + for ( ExportList::Iter exp = fsmCtx->exportList; exp.lte(); exp++ ) + exportList.append( new Export( exp->name, exp->key ) ); +} + +void Reducer::makeAction( Action *action ) +{ + GenInlineList *genList = new GenInlineList; + + curInlineAction = action; + makeGenInlineList( genList, action->inlineList ); + curInlineAction = 0; + + newAction( curAction++, action->name, action->loc, genList ); +} + + +void Reducer::makeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = fsmCtx->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + /* Write the list. */ + initActionList( nextActionId ); + curAction = 0; + + for ( ActionList::Iter act = fsmCtx->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + makeAction( act ); + } +} + +void Reducer::makeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + initActionTableList( numTables ); + curActionTable = 0; + + for ( int t = 0; t < numTables; t++ ) { + long length = tables[t]->key.length(); + + /* Collect the action table. */ + RedAction *redAct = allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + redAct->key[atel.pos()].key = 0; + redAct->key[atel.pos()].value = allActions + + atel->value->actionId; + } + + /* Insert into the action table map. */ + redFsm->actionMap.insert( redAct ); + + curActionTable += 1; + } + + delete[] tables; +} + +void Reducer::makeConditions() +{ + if ( fsm->ctx->condData->condSpaceMap.length() > 0 ) { + /* Allocate condition space ids. */ + long nextCondSpaceId = 0; + for ( CondSpaceMap::Iter cs = fsm->ctx->condData->condSpaceMap; cs.lte(); cs++ ) + cs->condSpaceId = nextCondSpaceId++; + + /* Allocate the array of conditions and put them on the list. */ + long length = fsm->ctx->condData->condSpaceMap.length(); + allCondSpaces = new GenCondSpace[length]; + for ( long c = 0; c < length; c++ ) + condSpaceList.append( &allCondSpaces[c] ); + + long curCondSpace = 0; + for ( CondSpaceMap::Iter cs = fsm->ctx->condData->condSpaceMap; cs.lte(); cs++ ) { + /* Transfer the id. */ + allCondSpaces[curCondSpace].condSpaceId = cs->condSpaceId; + + curCondSpace += 1; + } + } + + makeActionList(); + makeActionTableList(); + + if ( fsm->ctx->condData->condSpaceMap.length() > 0 ) { + long curCondSpace = 0; + for ( CondSpaceMap::Iter cs = fsm->ctx->condData->condSpaceMap; cs.lte(); cs++ ) { + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + condSpaceItem( curCondSpace, (*csi)->actionId ); + curCondSpace += 1; + } + } +} + +bool Reducer::makeNameInst( std::string &res, NameInst *nameInst ) +{ + bool written = false; + if ( nameInst->parent != 0 ) + written = makeNameInst( res, nameInst->parent ); + + if ( !nameInst->name.empty() ) { + if ( written ) + res += '_'; + res += nameInst->name; + written = true; + } + + return written; +} + +void Reducer::makeEntryPoints() +{ + /* List of entry points other than start state. */ + if ( fsm->entryPoints.length() > 0 || fsmCtx->lmRequiresErrorState ) { + if ( fsmCtx->lmRequiresErrorState ) + setForcedErrorState(); + + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + NameInst *nameInst = fsmCtx->nameIndex[en->key]; + std::string name; + makeNameInst( name, nameInst ); + StateAp *state = en->value; + addEntryPoint( strdup(name.c_str()), state->alg.stateNum ); + } + } +} + +void Reducer::makeStateActions( StateAp *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 ) { + long to = -1; + if ( toStateActions != 0 ) + to = toStateActions->id; + + long from = -1; + if ( fromStateActions != 0 ) + from = fromStateActions->id; + + setStateActions( curState, to, from, -1 ); + } +} + +void Reducer::makeTrans( Key lowKey, Key highKey, TransAp *trans ) +{ + RedCondEl *outConds; + int numConds; + + assert( ( allStates + curState ) != redFsm->errState ); + + if ( trans->plain() ) { + long targ = -1; + long action = -1; + + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->tdap()->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->tdap()->actionTable ); + + if ( trans->tdap()->toState != 0 ) + targ = trans->tdap()->toState->alg.stateNum; + + if ( actionTable != 0 ) + action = actionTable->id; + + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : redFsm->getErrorState(); + RedAction *at = action >= 0 ? (allActionTables + action) : 0; + + RedTransAp *trans = redFsm->allocateTrans( targState, at ); + newTrans( allStates + curState, lowKey, highKey, trans ); + } + else { + numConds = trans->tcap()->condList.length(); + outConds = new RedCondEl[numConds]; + int pos = 0; + for ( CondList::Iter cti = trans->tcap()->condList; cti.lte(); cti++, pos++ ) { + long targ = -1; + long action = -1; + + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( cti->actionTable.length() > 0 ) + actionTable = actionTableMap.find( cti->actionTable ); + + if ( cti->toState != 0 ) + targ = cti->toState->alg.stateNum; + + if ( actionTable != 0 ) + action = actionTable->id; + + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : redFsm->getErrorState(); + RedAction *at = action >= 0 ? (allActionTables + action) : 0; + RedCondAp *cond = redFsm->allocateCond( targState, at ); + + outConds[pos].key = cti->key; + outConds[pos].value = cond; + } + + GenCondSpace *condSpace = allCondSpaces + trans->condSpace->condSpaceId; + + /* If the cond list is not full then we need an error cond. */ + RedCondAp *errCond = 0; + if ( numConds < ( 1 << condSpace->condSet.length() ) ) + errCond = redFsm->getErrorCond(); + + RedTransAp *trans = redFsm->allocateTrans( + condSpace, outConds, numConds, errCond ); + + newTrans( allStates + curState, lowKey, highKey, trans ); + } +} + +void Reducer::makeEofTrans( StateAp *state ) +{ + /* EOF actions go out here only if the state has no eof target. If it has + * an eof target then an eof transition will be used instead. */ + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + /* Add an EOF transition if we have conditions, a target, or actions, */ + if ( state->outCondSpace != 0 || state->eofTarget != 0 || eofActions != 0 ) + redFsm->bAnyEofActivity = true; + + long targ = state->alg.stateNum; + long action = -1; + + if ( state->eofTarget != 0 ) + targ = state->eofTarget->alg.stateNum; + + if ( eofActions != 0 ) + action = eofActions->id; + + + if ( state->outCondSpace == 0 ) { + // std::cerr << "setEofTrans( " << + // state->alg.stateNum << ", " << targ << ", " << action << " );" << endl; + + setEofTrans( state->alg.stateNum, targ, action ); + } + else { + int numConds = state->outCondKeys.length(); + RedCondEl *outConds = new RedCondEl[numConds]; + for ( int pos = 0; pos < numConds; pos++ ) { + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : redFsm->getErrorState(); + RedAction *at = action >= 0 ? (allActionTables + action) : 0; + RedCondAp *cond = redFsm->allocateCond( targState, at ); + + outConds[pos].key = state->outCondKeys[pos]; + outConds[pos].value = cond; + } + + GenCondSpace *condSpace = allCondSpaces + state->outCondSpace->condSpaceId; + + /* If the cond list is not full then we need an error cond. */ + RedCondAp *errCond = 0; + if ( numConds < ( 1 << condSpace->condSet.length() ) ) + errCond = redFsm->getErrorCond(); + + setEofTrans( state->alg.stateNum, condSpace, outConds, numConds, errCond ); + } +} + + +void Reducer::makeTransList( StateAp *state ) +{ + TransListVect outList; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + initTransList( curState, outList.length() ); + + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + makeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + + finishTransList( curState ); +} + +void Reducer::makeStateList() +{ + /* Write the list of states. */ + long length = fsm->stateList.length(); + initStateList( length ); + curState = 0; + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + makeStateActions( st ); + makeEofTrans( st ); + makeTransList( st ); + + long id = st->alg.stateNum; + setId( curState, id ); + + if ( st->isFinState() ) + setFinal( curState ); + + if ( st->nfaOut != 0 ) { + RedStateAp *from = allStates + curState; + from->nfaTargs = new RedNfaTargs; + for ( NfaTransList::Iter targ = *st->nfaOut; targ.lte(); targ++ ) { + RedStateAp *rtarg = allStates + targ->toState->alg.stateNum; + + RedAction *pushRa = 0; + RedAction *popTestRa = 0; + + if ( targ->pushTable.length() > 0 ) { + RedActionTable *pushActions = + actionTableMap.find( targ->pushTable ); + pushRa = allActionTables + pushActions->id; + } + + if ( targ->popTest.length() > 0 ) { + RedActionTable *popActions = + actionTableMap.find( targ->popTest ); + popTestRa = allActionTables + popActions->id; + } + + + from->nfaTargs->append( RedNfaTarg( rtarg, pushRa, + popTestRa, targ->order ) ); + + MergeSort<RedNfaTarg, RedNfaTargCmp> sort; + sort.sort( from->nfaTargs->data, from->nfaTargs->length() ); + } + } + + curState += 1; + } +} + +void Reducer::makeMachine() +{ + createMachine(); + + /* Action tables. */ + reduceActionTables(); + + makeConditions(); + + /* Start State. */ + setStartState( fsm->startState->alg.stateNum ); + + /* Error state. */ + if ( fsm->errState != 0 ) + setErrorState( fsm->errState->alg.stateNum ); + + makeEntryPoints(); + makeStateList(); + + resolveTargetStates(); +} + +void Reducer::make( const HostLang *hostLang, const HostType *alphType ) +{ + /* Alphabet type. */ + setAlphType( hostLang, alphType->internalName ); + + /* Getkey expression. */ + if ( fsmCtx->getKeyExpr != 0 ) { + getKeyExpr = new GenInlineList; + makeGenInlineList( getKeyExpr, fsmCtx->getKeyExpr ); + } + + /* Access expression. */ + if ( fsmCtx->accessExpr != 0 ) { + accessExpr = new GenInlineList; + makeGenInlineList( accessExpr, fsmCtx->accessExpr ); + } + + /* PrePush expression. */ + if ( fsmCtx->prePushExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->prePushExpr->inlineList ); + prePushExpr = new GenInlineExpr( fsmCtx->prePushExpr->loc, il ); + } + + /* PostPop expression. */ + if ( fsmCtx->postPopExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->postPopExpr->inlineList ); + postPopExpr = new GenInlineExpr( fsmCtx->postPopExpr->loc, il ); + } + + /* PrePush expression. */ + if ( fsmCtx->nfaPrePushExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->nfaPrePushExpr->inlineList ); + nfaPrePushExpr = new GenInlineExpr( fsmCtx->nfaPrePushExpr->loc, il ); + } + + /* PostPop expression. */ + if ( fsmCtx->nfaPostPopExpr != 0 ) { + GenInlineList *il = new GenInlineList; + makeGenInlineList( il, fsmCtx->nfaPostPopExpr->inlineList ); + nfaPostPopExpr = new GenInlineExpr( fsmCtx->nfaPostPopExpr->loc, il ); + } + + + /* + * Variable expressions. + */ + + if ( fsmCtx->pExpr != 0 ) { + pExpr = new GenInlineList; + makeGenInlineList( pExpr, fsmCtx->pExpr ); + } + + if ( fsmCtx->peExpr != 0 ) { + peExpr = new GenInlineList; + makeGenInlineList( peExpr, fsmCtx->peExpr ); + } + + if ( fsmCtx->eofExpr != 0 ) { + eofExpr = new GenInlineList; + makeGenInlineList( eofExpr, fsmCtx->eofExpr ); + } + + if ( fsmCtx->csExpr != 0 ) { + csExpr = new GenInlineList; + makeGenInlineList( csExpr, fsmCtx->csExpr ); + } + + if ( fsmCtx->topExpr != 0 ) { + topExpr = new GenInlineList; + makeGenInlineList( topExpr, fsmCtx->topExpr ); + } + + if ( fsmCtx->stackExpr != 0 ) { + stackExpr = new GenInlineList; + makeGenInlineList( stackExpr, fsmCtx->stackExpr ); + } + + if ( fsmCtx->actExpr != 0 ) { + actExpr = new GenInlineList; + makeGenInlineList( actExpr, fsmCtx->actExpr ); + } + + if ( fsmCtx->tokstartExpr != 0 ) { + tokstartExpr = new GenInlineList; + makeGenInlineList( tokstartExpr, fsmCtx->tokstartExpr ); + } + + if ( fsmCtx->tokendExpr != 0 ) { + tokendExpr = new GenInlineList; + makeGenInlineList( tokendExpr, fsmCtx->tokendExpr ); + } + + if ( fsmCtx->dataExpr != 0 ) { + dataExpr = new GenInlineList; + makeGenInlineList( dataExpr, fsmCtx->dataExpr ); + } + + makeExports(); + makeMachine(); + + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + redFsm->maxKey = findMaxKey(); + + redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + redFsm->findFirstFinState(); +} + +void Reducer::createMachine() +{ + redFsm = new RedFsmAp( fsm->ctx, machineId ); +} + +void Reducer::initActionList( unsigned long length ) +{ + allActions = new GenAction[length]; + for ( unsigned long a = 0; a < length; a++ ) + actionList.append( allActions+a ); +} + +void Reducer::newAction( int anum, std::string name, + const InputLoc &loc, GenInlineList *inlineList ) +{ + allActions[anum].actionId = anum; + allActions[anum].name = name; + allActions[anum].loc = loc; + allActions[anum].inlineList = inlineList; +} + +void Reducer::initActionTableList( unsigned long length ) +{ + allActionTables = new RedAction[length]; +} + +void Reducer::initStateList( unsigned long length ) +{ + redFsm->allStates = allStates = new RedStateAp[length]; + for ( unsigned long s = 0; s < length; s++ ) + redFsm->stateList.append( allStates+s ); + + /* We get the start state as an offset, set the pointer now. */ + if ( startState >= 0 ) + redFsm->startState = allStates + startState; + if ( errState >= 0 ) + redFsm->errState = allStates + errState; + for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ ) + redFsm->entryPoints.insert( allStates + *en ); + + /* The nextStateId is no longer used to assign state ids (they come in set + * from the frontend now), however generation code still depends on it. + * Should eventually remove this variable. */ + redFsm->nextStateId = redFsm->stateList.length(); +} + +void Reducer::setStartState( unsigned long _startState ) +{ + startState = _startState; +} + +void Reducer::setErrorState( unsigned long _errState ) +{ + errState = _errState; +} + +void Reducer::addEntryPoint( char *name, unsigned long entryState ) +{ + entryPointIds.append( entryState ); + entryPointNames.append( name ); +} + +void Reducer::initTransList( int snum, unsigned long length ) +{ + /* Could preallocate the out range to save time growing it. For now do + * nothing. */ +} + +void Reducer::newTrans( RedStateAp *state, Key lowKey, Key highKey, RedTransAp *trans ) +{ + /* Get the current state and range. */ + RedTransList &destRange = state->outRange; + + /* Reduced machines are complete. We need to fill any gaps with the error + * transitions. */ + if ( destRange.length() == 0 ) { + /* Range is currently empty. */ + if ( keyOps->lt( keyOps->minKey, lowKey ) ) { + /* The first range doesn't start at the low end. */ + Key fillHighKey = lowKey; + keyOps->decrement( fillHighKey ); + + /* Create the filler with the state's error transition. */ + RedTransEl newTel( fsm->ctx->keyOps->minKey, fillHighKey, + redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + else { + /* The range list is not empty, get the the last range. */ + RedTransEl *last = &destRange[destRange.length()-1]; + Key nextKey = last->highKey; + keyOps->increment( nextKey ); + if ( keyOps->lt( nextKey, lowKey ) ) { + /* There is a gap to fill. Make the high key. */ + Key fillHighKey = lowKey; + keyOps->decrement( fillHighKey ); + + /* Create the filler with the state's error transtion. */ + RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + + /* Filler taken care of. Append the range. */ + destRange.append( RedTransEl( lowKey, highKey, trans ) ); +} + +void Reducer::finishTransList( int snum ) +{ + /* Get the current state and range. */ + RedStateAp *curState = allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* We may need filler on the end. */ + /* Check if there are any ranges already. */ + if ( destRange.length() == 0 ) { + /* Fill with the whole alphabet. */ + /* Add the range on the lower and upper bound. */ + RedTransEl newTel( fsm->ctx->keyOps->minKey, + fsm->ctx->keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + else { + /* Get the last and check for a gap on the end. */ + RedTransEl *last = &destRange[destRange.length()-1]; + if ( keyOps->lt( last->highKey, fsm->ctx->keyOps->maxKey ) ) { + /* Make the high key. */ + Key fillLowKey = last->highKey; + keyOps->increment( fillLowKey ); + + /* Create the new range with the error trans and append it. */ + RedTransEl newTel( fillLowKey, fsm->ctx->keyOps->maxKey, + redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } +} + +void Reducer::setId( int snum, int id ) +{ + RedStateAp *curState = allStates + snum; + curState->id = id; +} + +void Reducer::setFinal( int snum ) +{ + RedStateAp *curState = allStates + snum; + curState->isFinal = true; +} + + +void Reducer::setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ) +{ + RedStateAp *curState = allStates + snum; + if ( toStateAction >= 0 ) + curState->toStateAction = allActionTables + toStateAction; + if ( fromStateAction >= 0 ) + curState->fromStateAction = allActionTables + fromStateAction; + if ( eofAction >= 0 ) + curState->eofAction = allActionTables + eofAction; +} + +void Reducer::setEofTrans( int snum, long eofTarget, long actId ) +{ + RedStateAp *curState = allStates + snum; + RedStateAp *targState = allStates + eofTarget; + RedAction *eofAct = actId >= 0 ? allActionTables + actId : 0; + + RedTransAp *trans = redFsm->allocateTrans( targState, eofAct ); + curState->eofTrans = trans; +} + +void Reducer::setEofTrans( int snum, GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ) +{ + RedStateAp *curState = allStates + snum; + + RedTransAp *trans = redFsm->allocateTrans( condSpace, outConds, numConds, errCond ); + + curState->eofTrans = trans; +} + +void Reducer::resolveTargetStates( GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Goto: case GenInlineItem::Call: + case GenInlineItem::Ncall: case GenInlineItem::Next: + case GenInlineItem::Entry: + item->targState = allStates + item->targId; + break; + default: + break; + } + + if ( item->children != 0 ) + resolveTargetStates( item->children ); + } +} + +void Reducer::resolveTargetStates() +{ + for ( GenActionList::Iter a = actionList; a.lte(); a++ ) + resolveTargetStates( a->inlineList ); + +#if 0 + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + setLabelsNeeded( item->value->inlineList ); + } + + if ( st->eofTrans != 0 ) { + long condsFullSize = st->eofTrans->condFullSize(); + for ( int c = 0; c < condsFullSize; c++ ) { + RedCondPair *pair = st->eofTrans->outCond( c ); + setLabelsNeeded( pair ); + } + } +#endif +} + +bool Reducer::setAlphType( const HostLang *hostLang, const char *data ) +{ + HostType *alphType = findAlphTypeInternal( hostLang, data ); + if ( alphType == 0 ) + return false; + + return true; +} + +void Reducer::condSpaceItem( int cnum, long condActionId ) +{ + GenCondSpace *cond = allCondSpaces + cnum; + cond->condSet.append( allActions + condActionId ); +} + +void Reducer::initStateCondList( int snum, ulong length ) +{ + /* Could preallocate these, as we could with transitions. */ +} + +void Reducer::addStateCond( int snum, Key lowKey, Key highKey, long condNum ) +{ +} + +Key Reducer::findMaxKey() +{ + Key maxKey = fsm->ctx->keyOps->maxKey; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + assert( st->outSingle.length() == 0 ); + assert( st->defTrans == 0 ); + + long rangeLen = st->outRange.length(); + if ( rangeLen > 0 ) { + Key highKey = st->outRange[rangeLen-1].highKey; + if ( keyOps->gt( highKey, maxKey ) ) + maxKey = highKey; + } + } + return maxKey; +} + +void Reducer::actionActionRefs( RedAction *action ) +{ + action->numTransRefs += 1; + for ( GenActionTable::Iter item = action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; +} + +void Reducer::transActionRefs( RedTransAp *trans ) +{ + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 ) + actionActionRefs( cond->action ); + } + + if ( trans->condSpace != 0 ) + trans->condSpace->numTransRefs += 1; +} + +void Reducer::transListActionRefs( RedTransList &list ) +{ + for ( RedTransList::Iter rtel = list; rtel.lte(); rtel++ ) + transActionRefs( rtel->value ); +} + +void Reducer::findFinalActionRefs() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Rerence count out of single transitions. */ + transListActionRefs( st->outSingle ); + + /* Reference count out of range transitions. */ + transListActionRefs( st->outRange ); + + /* Reference count default transition. */ + if ( st->defTrans != 0 ) + transActionRefs( st->defTrans ); + + /* Reference count EOF transitions. */ + if ( st->eofTrans != 0 ) + transActionRefs( st->eofTrans ); + + /* Reference count to state actions. */ + if ( st->toStateAction != 0 ) { + st->toStateAction->numToStateRefs += 1; + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + item->value->numToStateRefs += 1; + } + + /* Reference count from state actions. */ + if ( st->fromStateAction != 0 ) { + st->fromStateAction->numFromStateRefs += 1; + for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) + item->value->numFromStateRefs += 1; + } + + /* Reference count EOF actions. */ + if ( st->eofAction != 0 ) { + st->eofAction->numEofRefs += 1; + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + item->value->numEofRefs += 1; + } + + if ( st->nfaTargs != 0 ) { + for ( RedNfaTargs::Iter nt = *st->nfaTargs; nt.lte(); nt++ ) { + + if ( nt->push != 0 ) { + nt->push->numNfaPushRefs += 1; + for ( GenActionTable::Iter item = nt->push->key; item.lte(); item++ ) + item->value->numNfaPushRefs += 1; + } + + if ( nt->popTest != 0 ) { + nt->popTest->numNfaPopTestRefs += 1; + for ( GenActionTable::Iter item = nt->popTest->key; item.lte(); item++ ) + item->value->numNfaPopTestRefs += 1; + } + } + } + } +} + +void Reducer::analyzeAction( GenAction *act, GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Only consider actions that are referenced. */ + if ( act->numRefs() > 0 ) { + if ( item->type == GenInlineItem::Goto || item->type == GenInlineItem::GotoExpr ) + { + redFsm->bAnyActionGotos = true; + } + else if ( item->type == GenInlineItem::Call || item->type == GenInlineItem::CallExpr ) { + redFsm->bAnyActionCalls = true; + } + else if ( item->type == GenInlineItem::Ncall || item->type == GenInlineItem::NcallExpr ) { + redFsm->bAnyActionCalls = true; + } + else if ( item->type == GenInlineItem::Ret ) + redFsm->bAnyActionRets = true; + else if ( item->type == GenInlineItem::Nret ) + redFsm->bAnyActionNrets = true; + else if ( item->type == GenInlineItem::LmInitAct || + item->type == GenInlineItem::LmSetActId || + item->type == GenInlineItem::LmSwitch ) + { + redFsm->bUsingAct = true; + } + + /* Any by value control in all actions? */ + if ( item->type == GenInlineItem::CallExpr || item->type == GenInlineItem::GotoExpr ) + redFsm->bAnyActionByValControl = true; + } + + /* Check for various things in regular actions. */ + if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || act->numFromStateRefs > 0 ) { + /* Any returns in regular actions? */ + if ( item->type == GenInlineItem::Ret || item->type == GenInlineItem::Nret ) + redFsm->bAnyRegActionRets = true; + + /* Any next statements in the regular actions? */ + if ( item->type == GenInlineItem::Next || item->type == GenInlineItem::NextExpr || + item->type == GenInlineItem::Ncall || item->type == GenInlineItem::NcallExpr || + item->type == GenInlineItem::Nret ) + redFsm->bAnyRegNextStmt = true; + + /* Any by value control in regular actions? */ + if ( item->type == GenInlineItem::CallExpr || item->type == GenInlineItem::GotoExpr ) + redFsm->bAnyRegActionByValControl = true; + + /* Any references to the current state in regular actions? */ + if ( item->type == GenInlineItem::Curs ) + redFsm->bAnyRegCurStateRef = true; + + if ( item->type == GenInlineItem::Break ) + redFsm->bAnyRegBreak = true; + + if ( item->type == GenInlineItem::Nbreak ) + redFsm->bAnyRegNbreak = true; + } + + if ( item->children != 0 ) + analyzeAction( act, item->children ); + } +} + +void Reducer::analyzeActionList( RedAction *redAct, GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Any next statements in the action table? */ + if ( item->type == GenInlineItem::Next || item->type == GenInlineItem::NextExpr || + item->type == GenInlineItem::Ncall || item->type == GenInlineItem::NcallExpr || + item->type == GenInlineItem::Nret ) + redAct->bAnyNextStmt = true; + + /* Any references to the current state. */ + if ( item->type == GenInlineItem::Curs ) + redAct->bAnyCurStateRef = true; + + if ( item->type == GenInlineItem::Break ) + redAct->bAnyBreakStmt = true; + + if ( item->type == GenInlineItem::NfaWrapConds ) + item->condSpace->numNfaRefs += 1; + + if ( item->children != 0 ) + analyzeActionList( redAct, item->children ); + } +} + +/* Assign ids to referenced actions. */ +void Reducer::assignActionIds() +{ + int nextActionId = 0; + for ( GenActionList::Iter act = actionList; act.lte(); act++ ) { + /* Only ever interested in referenced actions. */ + if ( act->numRefs() > 0 ) + act->actionId = nextActionId++; + } +} + +void Reducer::setValueLimits() +{ + redFsm->maxSingleLen = 0; + redFsm->maxRangeLen = 0; + redFsm->maxKeyOffset = 0; + redFsm->maxIndexOffset = 0; + redFsm->maxActListId = 0; + redFsm->maxActionLoc = 0; + redFsm->maxActArrItem = 0; + redFsm->maxSpan = 0; + redFsm->maxFlatIndexOffset = 0; + redFsm->maxCondSpaceId = 0; + + /* In both of these cases the 0 index is reserved for no value, so the max + * is one more than it would be if they started at 0. */ + redFsm->maxIndex = redFsm->transSet.length(); + redFsm->maxCond = condSpaceList.length(); + + /* The nextStateId - 1 is the last state id assigned. */ + redFsm->maxState = redFsm->nextStateId - 1; + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + if ( csi->condSpaceId > redFsm->maxCondSpaceId ) + redFsm->maxCondSpaceId = csi->condSpaceId; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Maximum single length. */ + if ( st->outSingle.length() > redFsm->maxSingleLen ) + redFsm->maxSingleLen = st->outSingle.length(); + + /* Maximum range length. */ + if ( st->outRange.length() > redFsm->maxRangeLen ) + redFsm->maxRangeLen = st->outRange.length(); + + /* The key offset index offset for the state after last is not used, skip it.. */ + if ( ! st.last() ) { + redFsm->maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; + redFsm->maxIndexOffset += st->outSingle.length() + st->outRange.length() + 2; + } + + /* Max key span. */ + if ( st->transList != 0 ) { + unsigned long long span = fsm->ctx->keyOps->span( st->lowKey, st->highKey ); + if ( span > redFsm->maxSpan ) + redFsm->maxSpan = span; + } + + /* Max flat index offset. */ + if ( ! st.last() ) { + if ( st->transList != 0 ) + redFsm->maxFlatIndexOffset += fsm->ctx->keyOps->span( st->lowKey, st->highKey ); + redFsm->maxFlatIndexOffset += 1; + } + } + + for ( GenActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) { + /* Maximum id of action lists. */ + if ( at->actListId+1 > redFsm->maxActListId ) + redFsm->maxActListId = at->actListId+1; + + /* Maximum location of items in action array. */ + if ( at->location+1 > redFsm->maxActionLoc ) + redFsm->maxActionLoc = at->location+1; + + /* Maximum values going into the action array. */ + if ( at->key.length() > redFsm->maxActArrItem ) + redFsm->maxActArrItem = at->key.length(); + for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) { + if ( item->value->actionId > redFsm->maxActArrItem ) + redFsm->maxActArrItem = item->value->actionId; + } + } +} + +/* Gather various info on the machine. */ +void Reducer::analyzeMachine() +{ + /* Find the true count of action references. */ + findFinalActionRefs(); + + /* Check if there are any calls in action code. */ + for ( GenActionList::Iter act = actionList; act.lte(); act++ ) { + /* Record the occurrence of various kinds of actions. */ + if ( act->numToStateRefs > 0 ) + redFsm->bAnyToStateActions = true; + if ( act->numFromStateRefs > 0 ) + redFsm->bAnyFromStateActions = true; + if ( act->numEofRefs > 0 ) + redFsm->bAnyEofActions = true; + if ( act->numTransRefs > 0 ) + redFsm->bAnyRegActions = true; + + if ( act->numNfaPushRefs > 0 ) { + redFsm->bAnyNfaPushPops = true; + redFsm->bAnyNfaPushes = true; + } + + if ( act->numNfaPopActionRefs > 0 ) { + redFsm->bAnyNfaPushPops = true; + redFsm->bAnyNfaPops = true; + } + + if ( act->numNfaPopTestRefs > 0 ) { + redFsm->bAnyNfaPushPops = true; + redFsm->bAnyNfaPops = true; + } + + /* Recurse through the action's parse tree looking for various things. */ + analyzeAction( act, act->inlineList ); + } + + /* Analyze reduced action lists. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ ) + if ( act->value->inlineList != 0 ) + analyzeActionList( redAct, act->value->inlineList ); + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) + redFsm->bAnyNfaStates = true; + } + + /* Find states that have transitions with actions that have next + * statements. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Check any actions out of outSinge. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 && cond->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + } + + /* Check any actions out of outRange. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 && cond->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + } + + /* Check any action out of default. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + if ( cond->action != 0 && cond->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + } + + if ( st->eofTrans != 0 ) + redFsm->bAnyEofTrans = true; + } + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + + if ( condSpace->numTransRefs > 0 ) + redFsm->bAnyTransCondRefs = true; + + if ( condSpace->numNfaRefs > 0 ) + redFsm->bAnyNfaCondRefs = true; + } + + /* Assign ids to actions that are referenced. */ + assignActionIds(); + + /* Set the maximums of various values used for deciding types. */ + setValueLimits(); +} + +void CodeGenData::genOutputLineDirective( std::ostream &out ) const +{ + std::streambuf *sbuf = out.rdbuf(); + output_filter *filter = dynamic_cast<output_filter*>(sbuf); + if ( filter != 0 ) + (*genLineDirective)( out, lineDirectives, filter->line + 1, filter->fileName ); +} + +void CodeGenData::write_option_error( InputLoc &loc, std::string arg ) +{ + red->id->warning(loc) << "unrecognized write option \"" << arg << "\"" << std::endl; +} + +void CodeGenData::writeClear() +{ + clear(); + + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + red->fsm->ctx->actionList.empty(); + + delete red->fsm; + red->fsm = 0; + + // red->pd->graphDict.empty(); + + cleared = true; +} + +void CodeGenData::collectReferences() +{ + /* Do this once only. */ + if ( !referencesCollected ) { + referencesCollected = true; + + /* Nullify the output and execute the write. We use this pass to collect references. */ + nullbuf nb; + std::streambuf *filt = out.rdbuf( &nb ); + writeExec(); + + /* Restore the output for whatever writing comes next. */ + out.rdbuf( filt ); + } +} + +void CodeGenData::writeStatement( InputLoc &loc, int nargs, + std::vector<std::string> &args, bool generateDot, const HostLang *hostLang ) +{ + /* Start write generation on a fresh line. */ + out << '\n'; + + if ( cleared ) { + red->id->error(loc) << "write statement following a clear is invalid" << std::endl; + return; + } + + genOutputLineDirective( out ); + + if ( args[0] == "data" ) { + for ( int i = 1; i < nargs; i++ ) { + if ( args[i] == "noerror" ) + noError = true; + else if ( args[i] == "noprefix" ) + noPrefix = true; + else if ( args[i] == "nofinal" ) + noFinal = true; + else + write_option_error( loc, args[i] ); + } + + if ( red->id->printStatistics ) { + red->id->stats() << "fsm-name\t" << fsmName << std::endl; + red->id->stats() << "fsm-states\t" << redFsm->stateList.length() << std::endl; + } + + collectReferences(); + writeData(); + statsSummary(); + } + else if ( args[0] == "init" ) { + for ( int i = 1; i < nargs; i++ ) { + if ( args[i] == "nocs" ) + noCS = true; + else + write_option_error( loc, args[i] ); + } + writeInit(); + } + else if ( args[0] == "exec" ) { + for ( int i = 1; i < nargs; i++ ) { + if ( args[i] == "noend" ) + noEnd = true; + else + write_option_error( loc, args[i] ); + } + collectReferences(); + writeExec(); + } + else if ( args[0] == "exports" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeExports(); + } + else if ( args[0] == "start" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeStart(); + } + else if ( args[0] == "first_final" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeFirstFinal(); + } + else if ( args[0] == "error" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeError(); + } + else if ( args[0] == "clear" ) { + for ( int i = 1; i < nargs; i++ ) + write_option_error( loc, args[i] ); + writeClear(); + } + else { + /* EMIT An error here. */ + red->id->error(loc) << "unrecognized write command \"" << + args[0] << "\"" << std::endl; + } +} diff --git a/libfsm/gendata.h b/libfsm/gendata.h new file mode 100644 index 00000000..f34f2629 --- /dev/null +++ b/libfsm/gendata.h @@ -0,0 +1,477 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _GENDATA_H +#define _GENDATA_H + +#include <iostream> +#include <string> +#include <vector> +#include "config.h" +#include "redfsm.h" +#include "common.h" +#include "fsmgraph.h" + +/* Forwards. */ +struct TransAp; +struct FsmAp; +struct PdBase; +struct InputData; +struct FsmGbl; +struct GenInlineList; +struct InlineItem; + +struct RedActionTable +: + public AvlTreeEl<RedActionTable> +{ + RedActionTable( const ActionTable &key ) + : + key(key), + id(0) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int id; +}; + +typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; + +struct NextRedTrans +{ + Key lowKey, highKey; + TransAp *trans; + TransAp *next; + + void load() { + if ( trans != 0 ) { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + NextRedTrans( TransAp *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + +struct RedBase +{ + RedBase( FsmGbl *id, FsmCtx *fsmCtx, FsmAp *fsm, std::string fsmName, int machineId ) + : + id(id), + fsmCtx(fsmCtx), + fsm(fsm), + fsmName(fsmName), + machineId(machineId), + keyOps(fsm->ctx->keyOps), + nextActionTableId(0) + { + } + + FsmGbl *id; + FsmCtx *fsmCtx; + FsmAp *fsm; + std::string fsmName; + int machineId; + + KeyOps *keyOps; + + ActionTableMap actionTableMap; + int nextActionTableId; +}; + +struct NameInst; +typedef DList<GenAction> GenActionList; + +typedef unsigned long ulong; + +void openHostBlock( char opener, InputData *id, std::ostream &out, const char *fileName, int line ); + +string itoa( int i ); + +struct Reducer + : public RedBase +{ + Reducer( FsmGbl *id, FsmCtx *fsmCtx, FsmAp *fsm, std::string fsmName, int machineId ) + : + RedBase( id, fsmCtx, fsm, fsmName, machineId ), + redFsm(0), + allActions(0), + allActionTables(0), + allConditions(0), + allCondSpaces(0), + allStates(0), + nameIndex(0), + startState(-1), + errState(-1), + getKeyExpr(0), + accessExpr(0), + prePushExpr(0), + postPopExpr(0), + nfaPrePushExpr(0), + nfaPostPopExpr(0), + pExpr(0), + peExpr(0), + eofExpr(0), + csExpr(0), + topExpr(0), + stackExpr(0), + actExpr(0), + tokstartExpr(0), + tokendExpr(0), + dataExpr(0), + hasLongestMatch(false) + { + } + + ~Reducer() + { + if ( redFsm != 0 ) + delete redFsm; + + delete[] allActions; + delete[] allActionTables; + delete[] allConditions; + delete[] allCondSpaces; + + actionTableMap.empty(); + + if ( getKeyExpr != 0 ) + delete getKeyExpr; + if ( accessExpr != 0 ) + delete accessExpr; + if ( prePushExpr != 0 ) + delete prePushExpr; + if ( postPopExpr != 0 ) + delete postPopExpr; + if ( nfaPrePushExpr != 0 ) + delete nfaPrePushExpr; + if ( nfaPostPopExpr != 0 ) + delete nfaPostPopExpr; + if ( pExpr != 0 ) + delete pExpr; + if ( peExpr != 0 ) + delete peExpr; + if ( eofExpr != 0 ) + delete eofExpr; + if ( csExpr != 0 ) + delete csExpr; + if ( topExpr != 0 ) + delete topExpr; + if ( stackExpr != 0 ) + delete stackExpr; + if ( actExpr != 0 ) + delete actExpr; + if ( tokstartExpr != 0 ) + delete tokstartExpr; + if ( tokendExpr != 0 ) + delete tokendExpr; + if ( dataExpr != 0 ) + delete dataExpr; + } + +protected: + /* Collected during parsing. */ + int curAction; + int curActionTable; + int curState; + + void makeKey( GenInlineList *outList, Key key ); + void makeText( GenInlineList *outList, InlineItem *item ); + void makeLmOnLast( GenInlineList *outList, InlineItem *item ); + void makeLmOnNext( GenInlineList *outList, InlineItem *item ); + void makeLmOnLagBehind( GenInlineList *outList, InlineItem *item ); + void makeLmSwitch( GenInlineList *outList, InlineItem *item ); + void makeLmNfaOnLast( GenInlineList *outList, InlineItem *item ); + void makeLmNfaOnNext( GenInlineList *outList, InlineItem *item ); + void makeLmNfaOnEof( GenInlineList *outList, InlineItem *item ); + void makeActionExec( GenInlineList *outList, InlineItem *item ); + void makeSetTokend( GenInlineList *outList, long offset ); + void makeSetAct( GenInlineList *outList, long lmId ); + void makeSubList( GenInlineList *outList, InlineList *inlineList, + GenInlineItem::Type type ); + void makeTargetItem( GenInlineList *outList, NameInst *nameTarg, + GenInlineItem::Type type ); + void makeExecGetTokend( GenInlineList *outList ); + void makeActionList(); + void makeAction( Action *action ); + void makeActionTableList(); + void makeConditions(); + void makeEntryPoints(); + bool makeNameInst( std::string &out, NameInst *nameInst ); + void makeStateList(); + + void makeStateActions( StateAp *state ); + void makeEofTrans( StateAp *state ); + void makeTransList( StateAp *state ); + void makeTrans( Key lowKey, Key highKey, TransAp *trans ); + void newTrans( RedStateAp *state, Key lowKey, Key highKey, RedTransAp *trans ); + + void makeSubList( GenInlineList *outList, const InputLoc &loc, + InlineList *inlineList, GenInlineItem::Type type ); + + void createMachine(); + void initActionList( unsigned long length ); + void newAction( int anum, std::string name, + const InputLoc &loc, GenInlineList *inlineList ); + void initActionTableList( unsigned long length ); + void initStateList( unsigned long length ); + void setStartState( unsigned long startState ); + void setErrorState( unsigned long errState ); + void addEntryPoint( char *name, unsigned long entryState ); + void setId( int snum, int id ); + void setFinal( int snum ); + void initTransList( int snum, unsigned long length ); + + void newTrans( int snum, int tnum, Key lowKey, Key highKey, + GenCondSpace *gcs, RedTransAp *trans ); + + void finishTransList( int snum ); + void setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ); + void setEofTrans( int snum, long targ, long eofAction ); + void setEofTrans( int snum, GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ); + void setForcedErrorState() + { redFsm->forcedErrorState = true; } + + void condSpaceItem( int cnum, long condActionId ); + void newCondSpace( int cnum, int condSpaceId ); + + void initStateCondList( int snum, ulong length ); + void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); + + + void resolveTargetStates( GenInlineList *inlineList ); + void resolveTargetStates(); + + + /* Gather various info on the machine. */ + void analyzeActionList( RedAction *redAct, GenInlineList *inlineList ); + void analyzeAction( GenAction *act, GenInlineList *inlineList ); + void actionActionRefs( RedAction *action ); + void transListActionRefs( RedTransList &list ); + void transActionRefs( RedTransAp *trans ); + void findFinalActionRefs(); + + void setValueLimits(); + void assignActionIds(); + + + void appendTrans( TransListVect &outList, Key lowKey, Key highKey, TransAp *trans ); + void reduceActionTables(); + +public: + + Key findMaxKey(); + void makeMachine(); + void makeExports(); + void makeGenInlineList( GenInlineList *outList, InlineList *inList ); + bool setAlphType( const HostLang *hostLang, const char *data ); + void analyzeMachine(); + void make( const HostLang *hostLang, const HostType *alphType ); + + /* + * Collecting the machine. + */ + + RedFsmAp *redFsm; + GenAction *allActions; + RedAction *allActionTables; + Condition *allConditions; + GenCondSpace *allCondSpaces; + RedStateAp *allStates; + NameInst **nameIndex; + int startState; + int errState; + GenActionList actionList; + CondSpaceList condSpaceList; + + GenInlineList *getKeyExpr; + GenInlineList *accessExpr; + GenInlineExpr *prePushExpr; + GenInlineExpr *postPopExpr; + + GenInlineExpr *nfaPrePushExpr; + GenInlineExpr *nfaPostPopExpr; + + /* Overriding variables. */ + GenInlineList *pExpr; + GenInlineList *peExpr; + GenInlineList *eofExpr; + GenInlineList *csExpr; + GenInlineList *topExpr; + GenInlineList *stackExpr; + GenInlineList *actExpr; + GenInlineList *tokstartExpr; + GenInlineList *tokendExpr; + GenInlineList *dataExpr; + + EntryIdVect entryPointIds; + EntryNameVect entryPointNames; + bool hasLongestMatch; + ExportList exportList; + Action *curInlineAction; +}; + +struct CodeGenArgs +{ + CodeGenArgs( FsmGbl *id, Reducer *red, HostType *alphType, + int machineId, std::string sourceFileName, + std::string fsmName, std::ostream &out, + CodeStyle codeStyle ) + : + id(id), + red(red), + alphType(alphType), + machineId(machineId), + sourceFileName(sourceFileName), + fsmName(fsmName), + out(out), + codeStyle(codeStyle), + lineDirectives(true), + forceVar(false), + loopLabels(false) + {} + + FsmGbl *id; + Reducer *red; + HostType *alphType; + int machineId; + std::string sourceFileName; + std::string fsmName; + std::ostream &out; + CodeStyle codeStyle; + bool lineDirectives; + GenLineDirectiveT genLineDirective; + bool forceVar; + bool loopLabels; +}; + +struct CodeGenData +{ + CodeGenData( const CodeGenArgs &args ) + : + red(args.red), + redFsm(args.red->redFsm), + sourceFileName(args.sourceFileName), + fsmName(args.fsmName), + keyOps(red->keyOps), + alphType(args.alphType), + out(args.out), + noEnd(false), + noPrefix(false), + noFinal(false), + noError(false), + noCS(false), + lineDirectives(args.lineDirectives), + cleared(false), + referencesCollected(false), + genLineDirective(args.id->hostLang->genLineDirective) + { + } + + /* + * The interface to the code generator. + */ + virtual void genAnalysis() = 0; + + /* These are invoked by writeStatement and are normally what are used to + * implement the code generators. */ + virtual void writeData() {}; + virtual void writeInit() {}; + virtual void writeExec() {}; + virtual void writeExports() {}; + virtual void writeStart() {}; + virtual void writeFirstFinal() {}; + virtual void writeError() {}; + virtual void writeClear(); + + /* Show some stats after a write data. */ + virtual void statsSummary() = 0; + + /* This can also be overridden to modify the processing of write + * statements. */ + virtual void writeStatement( InputLoc &loc, int nargs, + std::vector<std::string> &args, bool generateDot, const HostLang *hostLang ); + + /********************/ + + virtual ~CodeGenData() + { + } + + void clear() + { + delete red->redFsm; + red->redFsm = 0; + } + + void collectReferences(); + +protected: + + Reducer *red; + RedFsmAp *redFsm; + std::string sourceFileName; + std::string fsmName; + KeyOps *keyOps; + HostType *alphType; + ostream &out; + + /* Write options. */ + bool noEnd; + bool noPrefix; + bool noFinal; + bool noError; + bool noCS; + + void write_option_error( InputLoc &loc, std::string arg ); + + bool lineDirectives; + bool cleared; + + bool referencesCollected; + + void genOutputLineDirective( std::ostream &out ) const; + GenLineDirectiveT genLineDirective; +}; + +/* Selects and constructs the codegen based on the output options. */ +CodeGenData *makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ); +CodeGenData *asm_makeCodeGen( const HostLang *hostLang, const CodeGenArgs &args ); + +typedef AvlMap<char *, CodeGenData*, CmpStr> CodeGenMap; +typedef AvlMapEl<char *, CodeGenData*> CodeGenMapEl; + +#endif diff --git a/libfsm/goto.cc b/libfsm/goto.cc new file mode 100644 index 00000000..610f44d1 --- /dev/null +++ b/libfsm/goto.cc @@ -0,0 +1,978 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "goto.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" + +#include <sstream> + +using std::ostringstream; + +IpLabel *Goto::allocateLabels( IpLabel *labels, IpLabel::Type type, int n ) +{ + if ( labels == 0 ) { + labels = new IpLabel[n]; + for ( int id = 0; id < n; id++ ) { + labels[id].type = type; + labels[id].stid = id; + } + } + + return labels; +} + +void Goto::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +/* Emit the goto to take for a given transition. */ +std::ostream &Goto::COND_GOTO( RedCondPair *cond ) +{ + out << "goto " << ctrLabel[cond->id].reference() << ";"; + return out; +} + +/* Emit the goto to take for a given transition. */ +std::ostream &Goto::TRANS_GOTO( RedTransAp *trans ) +{ + if ( trans->condSpace == 0 || trans->condSpace->condSet.length() == 0 ) { + /* Existing. */ + assert( trans->numConds() == 1 ); + RedCondPair *cond = trans->outCond( 0 ); + + /* Go to the transition which will go to the state. */ + out << "goto " << ctrLabel[cond->id].reference() << ";"; + } + else { + out << ck << " = 0;\n"; + for ( GenCondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " )\n" << ck << " += " << condValOffset << ";\n"; + } + CondKey lower = 0; + CondKey upper = trans->condFullSize() - 1; + COND_B_SEARCH( trans, lower, upper, 0, trans->numConds()-1 ); + + if ( trans->errCond() != 0 ) { + COND_GOTO( trans->errCond() ) << "\n"; + } + } + + return out; +} + +/* Write out the array of actions. */ +void Goto::taActions() +{ + actions.start(); + + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + +void Goto::GOTO_HEADER( RedStateAp *state ) +{ + /* Label the state. */ + out << "case " << state->id << ":\n"; +} + + +void Goto::SINGLE_SWITCH( RedStateAp *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "if ( " << GET_KEY() << " == " << + KEY(data[0].lowKey) << " ) {\n"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value) << "\n"; + out << "}\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "switch( " << GET_KEY() << " ) {\n"; + + /* Write out the single indices. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "case " << KEY(data[j].lowKey) << ": {\n"; + TRANS_GOTO(data[j].value) << "\n"; + out << "}\n"; + } + + /* Close off the transition switch. */ + out << "}\n"; + } +} + +void Goto::RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = keyOps->eq( data[mid].lowKey, lower ); + bool limitHigh = keyOps->eq( data[mid].highKey, upper ); + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + out << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + out << "} else {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << "} else {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << "if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << "} else {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid].lowKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << "if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " ) {\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + out << "{\n"; + TRANS_GOTO(data[mid].value) << "\n"; + out << "}\n"; + } + } +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string Goto::CKEY( CondKey key ) +{ + ostringstream ret; + ret << key.getVal(); + return ret.str(); +} + +void Goto::COND_B_SEARCH( RedTransAp *trans, CondKey lower, + CondKey upper, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; +// RedCondEl *data = trans->outCond(0); + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + CondKey midKey = trans->outCondKey( mid ); + RedCondPair *midTrans = trans->outCond( mid ); + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = midKey == lower; + bool limitHigh = midKey == upper; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << "if ( " << ck << " < " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, lower, midKey-1, low, mid-1 ); + out << "} else if ( " << ck << " > " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, midKey+1, upper, mid+1, high ); + out << "} else {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << "if ( " << ck << " < " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, lower, midKey-1, low, mid-1); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << "} else {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << ck << " <= " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << "if ( " << ck << " > " << + CKEY(midKey) << " ) {\n"; + COND_B_SEARCH( trans, midKey+1, upper, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << "} else {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << ck << " >= " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << "if ( " << ck << " == " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << "if ( " << ck << " <= " << + CKEY(midKey) << " ) {\n"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << "if ( " << CKEY(midKey) << " <= " << ck << " )\n {"; + COND_GOTO(midTrans) << "\n"; + out << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + COND_GOTO(midTrans) << "\n"; + } + } +} + +void Goto::STATE_GOTO_ERROR() +{ + /* Bail out immediately. */ + out << " goto " << _again << ";\n"; +} + +void Goto::FROM_STATE_ACTION_EMIT( RedStateAp *state ) +{ + if ( state->fromStateAction != 0 ) { + /* Write every action in the list. */ + for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( state->id, false, + state->fromStateAction->anyNextStmt() ) ); + out << "\n"; + } + } +} + +std::ostream &Goto::STATE_CASES() +{ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + FROM_STATE_ACTION_EMIT( st ); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( st->eofTrans != 0 ) + TRANS_GOTO( st->eofTrans ); + + out << + " goto " << _again << ";\n" + "}\n" + "else {\n"; + } + + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + SINGLE_SWITCH( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) { + RANGE_B_SEARCH( st, keyOps->minKey, keyOps->maxKey, + 0, st->outRange.length() - 1 ); + } + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans ) << "\n"; + } + + if ( !noEnd && eof ) { + out << + "}\n"; + } + } + return out; +} + +std::ostream &Goto::TRANSITION( RedCondPair *pair ) +{ + /* Write the label for the transition so it can be jumped to. */ + if ( ctrLabel[pair->id].isReferenced ) + out << "_ctr" << pair->id << ": "; + + /* Destination state. */ + if ( pair->action != 0 && pair->action->anyCurStateRef() ) + out << ps << " = " << vCS() << ";"; + out << vCS() << " = " << pair->targ->id << "; "; + + if ( pair->action != 0 ) { + /* Write out the transition func. */ + out << "goto f" << pair->action->actListId << ";\n"; + } + else { + /* No code to execute, just loop around. */ + out << "goto " << _again << ";\n"; + } + return out; +} + +std::ostream &Goto::TRANSITIONS() +{ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + TRANSITION( &trans->p ); + } + + for ( CondApSet::Iter cond = redFsm->condSet; cond.lte(); cond++ ) + TRANSITION( &cond->p ); + + return out; +} + +unsigned int Goto::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int Goto::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +unsigned int Goto::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + +void Goto::taToStateActions() +{ + toStateActions.start(); + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + toStateActions.value( vals[st] ); + } + delete[] vals; + + toStateActions.finish(); +} + +void Goto::taFromStateActions() +{ + fromStateActions.start(); + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + fromStateActions.value( vals[st] ); + } + delete[] vals; + + fromStateActions.finish(); +} + +void Goto::taEofActions() +{ + eofActions.start(); + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = EOF_ACTION(st); + + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + eofActions.value( vals[st] ); + } + delete[] vals; + + eofActions.finish(); +} + +void Goto::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + vals[st->id] = 0; + //nfaOffsets.value( 0 ); + } + else { + vals[st->id] = offset; + //nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + for ( int st = 0; st < redFsm->nextStateId; st++ ) + nfaOffsets.value( vals[st] ); + delete[] vals; + + nfaOffsets.finish(); +} + +void Goto::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Goto::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Goto::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPopTrans.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + +void Goto::EOF_CHECK( ostream &ret ) +{ + ret << + " if ( " << P() << " == " << PE() << " )\n" + " goto " << _test_eof << ";\n"; +} + +void Goto::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << "; "; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << "goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::CURS( ostream &ret, bool inFinish ) +{ + ret << "(" << ps << ")"; +} + +void Goto::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << vCS() << ")"; +} + +void Goto::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << vCS() << " = " << nextDest << ";"; +} + +void Goto::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << vCS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ");"; +} + +void Goto::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << ";"; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << "; " << CLOSE_GEN_BLOCK(); +} + +void Goto::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void Goto::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << "-= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + if ( inFinish && !noEnd ) + EOF_CHECK( ret ); + + ret << "goto " << _again << ";" << CLOSE_GEN_BLOCK(); +} + +void Goto::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << "-= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void Goto::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << OPEN_GEN_BLOCK() << P() << " += 1; " << "goto " << _out << "; " << CLOSE_GEN_BLOCK(); +} + +void Goto::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << OPEN_GEN_BLOCK() << P() << " += 1; " << nbreak << " = 1; " << CLOSE_GEN_BLOCK(); +} + +void Goto::tableDataPass() +{ + if ( type == Loop ) + taActions(); + + taToStateActions(); + taFromStateActions(); + taEofActions(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Goto::genAnalysis() +{ + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose single. */ + redFsm->moveSelectTransToSingle(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + +void Goto::writeData() +{ + if ( type == Loop ) { + if ( redFsm->anyActions() ) + taActions(); + } + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + +void Goto::writeExec() +{ + int maxCtrId = redFsm->nextCondId > redFsm->nextTransId ? redFsm->nextCondId : redFsm->nextTransId; + ctrLabel = allocateLabels( ctrLabel, IpLabel::Ctr, maxCtrId ); + + out << "{\n"; + + DECLARE( INT(), cpc ); + DECLARE( INT(), ck ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), ps, " = 0" ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( UINT(), nacts ); + + out << "\n"; + + out << EMIT_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " if ( " << P() << " == " << PE() << " && " << P() << " != " << vEOF() << " )\n" + " goto " << _out << ";\n"; + } + else { + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto " << _out << ";\n"; + } + } + + NFA_PUSH( vCS() ); + + out << + " switch ( " << vCS() << " ) {\n"; + STATE_CASES() << + " }\n" + "\n"; + TRANSITIONS() << + "\n"; + + if ( redFsm->anyRegActions() ) + EXEC_FUNCS() << "\n"; + + out << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " goto " << _out << ";\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " goto " << _resume << ";\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " goto " << _out << ";\n" + "\n" + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + out << "goto " << _resume << ";\n"; + } + + out << EMIT_LABEL( _out ); + + out << "}\n"; +} diff --git a/libfsm/goto.h b/libfsm/goto.h new file mode 100644 index 00000000..dcf13448 --- /dev/null +++ b/libfsm/goto.h @@ -0,0 +1,226 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_GOTO_H +#define _C_GOTO_H + +#include <iostream> +#include "codegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; +struct GenStateCond; + +struct IpLabel +{ + IpLabel() + : + type(None), + stid(0), + isReferenced(false) + {} + + enum Type + { + None = 1, + TestEof, + Ctr, + St, + Out, + Pop + }; + + std::string reference() + { + isReferenced = true; + return define(); + } + + std::string define() + { + std::stringstream ss; + switch ( type ) { + case None: break; + case TestEof: + ss << "_test_eof" << stid; + break; + case Ctr: + ss << "_ctr" << stid; + break; + case St: + ss << "_st" << stid; + break; + case Out: + ss << "_out" << stid; + break; + case Pop: + ss << "_pop" << stid; + break; + } + + return ss.str(); + } + + Type type; + int stid; + bool isReferenced; +}; + + +/* + * Goto driven fsm. + */ +class Goto + : public CodeGen +{ +public: + enum Type { + Loop = 1, + Exp, + Ip + }; + + Goto( const CodeGenArgs &args, Type type ) + : + CodeGen( args ), + type(type), + acts( "_acts" ), + nacts( "_nacts" ), + ck( "_ck" ), + nbreak( "_nbreak" ), + ps( "_ps" ), + _out("_out"), + _pop("_pop"), + _again("_again"), + _resume("_resume"), + _test_eof("_test_eof"), + actions( "actions", *this ), + toStateActions( "to_state_actions", *this ), + fromStateActions( "from_state_actions", *this ), + eofActions( "eof_actions", *this ), + ctrLabel(0) + {} + + void tableDataPass(); + virtual void genAnalysis(); + virtual void writeData(); + virtual void writeExec(); + + std::ostream &TRANSITION( RedCondPair *pair ); + + void FROM_STATE_ACTION_EMIT( RedStateAp *state ); + + std::ostream &STATE_CASES(); + std::ostream &TRANSITIONS(); + + Type type; + + Variable acts; + Variable nacts; + Variable ck; + Variable nbreak; + Variable ps; + + GotoLabel _out; + GotoLabel _pop; + GotoLabel _again; + GotoLabel _resume; + GotoLabel _test_eof; + + TableArray actions; + TableArray toStateActions; + TableArray fromStateActions; + TableArray eofActions; + + IpLabel *ctrLabel; + + void taActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofActions(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void EOF_CHECK( ostream &ret ); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + virtual unsigned int TO_STATE_ACTION( RedStateAp *state ); + virtual unsigned int FROM_STATE_ACTION( RedStateAp *state ); + virtual unsigned int EOF_ACTION( RedStateAp *state ); + + virtual std::ostream &EXEC_FUNCS() = 0; + virtual std::ostream &TO_STATE_ACTION_SWITCH() = 0; + virtual std::ostream &FROM_STATE_ACTION_SWITCH() = 0; + virtual std::ostream &EOF_ACTION_SWITCH() = 0; + + std::ostream &ACTIONS_ARRAY(); + + void setTableState( TableArray::State ); + + virtual std::ostream &COND_GOTO( RedCondPair *trans ); + + string CKEY( CondKey key ); + void COND_B_SEARCH( RedTransAp *trans, CondKey lower, CondKey upper, int low, int high); + + virtual std::ostream &TRANS_GOTO( RedTransAp *trans ); + + void SINGLE_SWITCH( RedStateAp *state ); + void RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ); + + /* Called from STATE_GOTOS just before writing the gotos */ + virtual void GOTO_HEADER( RedStateAp *state ); + virtual void STATE_GOTO_ERROR(); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ) = 0; + virtual void NFA_POP_TEST( RedNfaTarg *targ ) {} + virtual void NFA_FROM_STATE_ACTION_EXEC() = 0; + + void NFA_POP() {} + + virtual void FROM_STATE_ACTIONS() = 0; + virtual void TO_STATE_ACTIONS() = 0; + virtual void REG_ACTIONS() = 0; + virtual void EOF_ACTIONS() = 0; + + IpLabel *allocateLabels( IpLabel *labels, IpLabel::Type type, int n ); +}; + +#endif diff --git a/libfsm/gotoexp.cc b/libfsm/gotoexp.cc new file mode 100644 index 00000000..dea9029c --- /dev/null +++ b/libfsm/gotoexp.cc @@ -0,0 +1,208 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "gotoexp.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" +#include "parsedata.h" +#include "inputdata.h" + +std::ostream &GotoExp::EXEC_FUNCS() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* We are at the start of a glob, write the case. */ + out << "f" << redAct->actListId << ":\n"; + + if ( redFsm->anyRegNbreak() ) + out << nbreak << " = 0;\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + + out << "goto " << _again << ";\n"; + } + } + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &GotoExp::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t" << CASE( STR( redAct->actListId+1 ) ) << "{\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &GotoExp::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\t" << CASE( STR( redAct->actListId+1 ) ) << "{\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoExp::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\t" << CASE( STR( redAct->actListId+1 ) ) << "{\n"; + + /* Write each action in the list of action items. */ + for ( GenActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, true, false ) ); + + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +unsigned int GotoExp::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + return act; +} + +unsigned int GotoExp::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + return act; +} + +unsigned int GotoExp::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + return act; +} + +void GotoExp::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void GotoExp::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + + +void GotoExp::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[nfa_bp[nfa_len].state] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void GotoExp::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << ARR_REF( fromStateActions ) << "[" << vCS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void GotoExp::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << ARR_REF( toStateActions ) << "[" << vCS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + "\n"; + } +} + +void GotoExp::REG_ACTIONS() +{ + +} + +void GotoExp::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " switch ( " << ARR_REF( eofActions ) << "[" << vCS() << "] ) {\n"; + EOF_ACTION_SWITCH() << + " }\n"; + } + +} diff --git a/libfsm/gotoexp.h b/libfsm/gotoexp.h new file mode 100644 index 00000000..ddb3f138 --- /dev/null +++ b/libfsm/gotoexp.h @@ -0,0 +1,75 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef SWITCH_GOTO_EXP_H +#define SWITCH_GOTO_EXP_H + +#include <iostream> +#include "goto.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * class GotoExp + */ +class GotoExp + : public Goto +{ +public: + GotoExp( const CodeGenArgs &args ) + : Goto(args, Exp) {} + + virtual std::ostream &EXEC_FUNCS(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + unsigned int TO_STATE_ACTION( RedStateAp *state ); + unsigned int FROM_STATE_ACTION( RedStateAp *state ); + unsigned int EOF_ACTION( RedStateAp *state ); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + virtual void NFA_FROM_STATE_ACTION_EXEC(); + + virtual void FROM_STATE_ACTIONS(); + virtual void TO_STATE_ACTIONS(); + virtual void REG_ACTIONS(); + virtual void EOF_ACTIONS(); +}; + +namespace C +{ + class GotoExp + : + public ::GotoExp + { + public: + GotoExp( const CodeGenArgs &args ) + : ::GotoExp( args ) + {} + }; +} + + +#endif diff --git a/libfsm/gotoloop.cc b/libfsm/gotoloop.cc new file mode 100644 index 00000000..41a8cd8a --- /dev/null +++ b/libfsm/gotoloop.cc @@ -0,0 +1,227 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "gotoloop.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" +#include "parsedata.h" +#include "inputdata.h" + +std::ostream &GotoLoop::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoLoop::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, true, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoLoop::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +std::ostream &GotoLoop::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = red->actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\t" << CASE( STR( act->actionId ) ) << "{\n"; + ACTION( out, act, IlOpts( 0, false, false ) ); + out << "\n\t" << CEND() << "\n}\n"; + } + } + + return out; +} + +void GotoLoop::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void GotoLoop::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + +std::ostream &GotoLoop::EXEC_FUNCS() +{ + /* Make labels that set acts and jump to execFuncs. Loop func indices. */ + for ( GenActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + out << " f" << redAct->actListId << ": " << + "" << acts << " = " << OFFSET( ARR_REF( actions ), itoa( redAct->location+1 ) ) << ";" + " goto execFuncs;\n"; + } + } + + out << + "\n" + "execFuncs:\n"; + + if ( redFsm->anyRegNbreak() ) + out << nbreak << " = 0;\n"; + + out << + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << " ) {\n"; + ACTION_SWITCH() << + " }\n" + " " << acts << " += 1;\n" + " " << nacts << " -= 1;\n" + " }\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + out << + " goto _again;\n"; + return out; +} + +void GotoLoop::NFA_FROM_STATE_ACTION_EXEC() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), ARR_REF( fromStateActions ) + "[nfa_bp[nfa_len].state]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << ";\n" + " " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << nacts << " -= 1;\n" + " " << acts << " += 1;\n" + " }\n" + "\n"; + } +} + +void GotoLoop::FROM_STATE_ACTIONS() +{ + if ( redFsm->anyFromStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), + ARR_REF( fromStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << "; " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << " ) {\n"; + FROM_STATE_ACTION_SWITCH() << + " }\n" + " " << acts << " += 1;\n" + " " << nacts << " -= 1;\n" + " }\n" + "\n"; + } +} + +void GotoLoop::TO_STATE_ACTIONS() +{ + if ( redFsm->anyToStateActions() ) { + out << + " " << acts << " = " << OFFSET( ARR_REF( actions ), + ARR_REF( toStateActions ) + "[" + vCS() + "]" ) << ";\n" + " " << nacts << " = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << "; " << acts << " += 1;\n" + " while ( " << nacts << " > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "" + acts.ref() + "" ) << " ) {\n"; + TO_STATE_ACTION_SWITCH() << + " }\n" + " " << acts << " += 1;\n" + " " << nacts << " -= 1;\n" + " }\n" + "\n"; + } +} + +void GotoLoop::REG_ACTIONS() +{ +} + +void GotoLoop::EOF_ACTIONS() +{ + if ( redFsm->anyEofActions() ) { + out << + " " << INDEX( ARR_TYPE( actions ), "__acts" ) << ";\n" + " " << UINT() << " __nacts;\n" + " __acts = " << OFFSET( ARR_REF( actions ), + ARR_REF( eofActions ) + "[" + vCS() + "]" ) << ";\n" + " __nacts = " << CAST( UINT() ) << DEREF( ARR_REF( actions ), "__acts" ) << "; __acts += 1;\n" + " while ( __nacts > 0 ) {\n" + " switch ( " << DEREF( ARR_REF( actions ), "__acts" ) << " ) {\n"; + EOF_ACTION_SWITCH() << + " }\n" + " __acts += 1;\n" + " __nacts -= 1;\n" + " }\n"; + } +} diff --git a/libfsm/gotoloop.h b/libfsm/gotoloop.h new file mode 100644 index 00000000..68c43ce2 --- /dev/null +++ b/libfsm/gotoloop.h @@ -0,0 +1,72 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef SWITCH_GOTO_LOOP_H +#define SWITCH_GOTO_LOOP_H + +#include <iostream> +#include "goto.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; +struct GenStateCond; + +class GotoLoop + : public Goto +{ +public: + GotoLoop( const CodeGenArgs &args ) + : Goto(args, Loop) {} + + virtual std::ostream &ACTION_SWITCH(); + virtual std::ostream &EXEC_FUNCS(); + virtual std::ostream &TO_STATE_ACTION_SWITCH(); + virtual std::ostream &FROM_STATE_ACTION_SWITCH(); + virtual std::ostream &EOF_ACTION_SWITCH(); + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ); + virtual void NFA_POP_TEST( RedNfaTarg *targ ); + virtual void NFA_FROM_STATE_ACTION_EXEC(); + + virtual void FROM_STATE_ACTIONS(); + virtual void TO_STATE_ACTIONS(); + virtual void REG_ACTIONS(); + virtual void EOF_ACTIONS(); +}; + +namespace C +{ + class GotoLoop + : + public ::GotoLoop + { + public: + GotoLoop( const CodeGenArgs &args ) + : ::GotoLoop( args ) + {} + }; +} + +#endif diff --git a/libfsm/idbase.cc b/libfsm/idbase.cc new file mode 100644 index 00000000..c4daa344 --- /dev/null +++ b/libfsm/idbase.cc @@ -0,0 +1,422 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "ragel.h" +#include "fsmgraph.h" +#include "parsedata.h" + +/* Error reporting format. */ +ErrorFormat errorFormat = ErrorFormatGNU; + +void FsmCtx::finalizeInstance( FsmAp *graph ) +{ + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer actions from the out action tables to eof action tables. */ + for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ ) + graph->transferOutActions( *state ); + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + if ( fsmGbl->wantDupsRemoved ) + graph->removeActionDups(); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + if ( graph->ctx->minimizeOpt != MinimizeNone ) { + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + switch ( graph->ctx->minimizeLevel ) { + #ifdef TO_UPGRADE_CONDS + case MinimizeApprox: + graph->minimizeApproximate(); + break; + #endif + #ifdef TO_UPGRADE_CONDS + case MinimizeStable: + graph->minimizeStable(); + break; + #endif + case MinimizePartition1: + graph->minimizePartition1(); + break; + case MinimizePartition2: + graph->minimizePartition2(); + break; + } + } + + graph->compressTransitions(); + + createNfaActions( graph ); +} + +void FsmCtx::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr || + item->type == InlineItem::Ncall || item->type == InlineItem::NcallExpr ) + { + action->anyCall = true; + } + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + LongestMatch *lm = item->longestMatch; + for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + LongestMatchPart *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + + +/* Check actions for bad uses of fsm directives. We don't go inside longest + * match items in actions created by ragel, since we just want the user + * actions. */ +void FsmCtx::checkInlineList( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* EOF checks. */ + if ( act->numEofRefs > 0 ) { + switch ( item->type ) { + /* Currently no checks. */ + default: + break; + } + } + + /* Recurse. */ + if ( item->children != 0 ) + checkInlineList( act, item->children ); + } +} + +void FsmCtx::checkAction( Action *action ) +{ + /* Check for actions with calls that are embedded within a longest match + * machine. */ + if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { + for ( NameInstVect::Iter ar = action->embedRoots; ar.lte(); ar++ ) { + NameInst *check = *ar; + while ( check != 0 ) { + if ( check->isLongestMatch ) { + fsmGbl->error(action->loc) << "within a scanner, fcall and fncall are permitted" + " only in pattern actions" << endl; + break; + } + check = check->parent; + } + } + } + + checkInlineList( action, action->inlineList ); +} + +void FsmCtx::analyzeGraph( FsmAp *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + //if ( trans->condSpace != 0 ) { + // for ( CondSet::Iter sci = trans->condSpace->condSet; sci.lte(); sci++ ) + // (*sci)->numCondRefs += 1; + //} + + if ( trans->plain() ) { + for ( ActionTable::Iter at = trans->tdap()->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + for ( ActionTable::Iter at = cond->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + } + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + //for ( OutCondSet::Iter oci = st->outCondSet; oci.lte(); oci++ ) + // oci->action->numCondRefs += 1; + + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + for ( ActionTable::Iter ati = n->pushTable; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->restoreTable; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->popAction; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->popTest; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + } + } + } + + /* Can't count on cond references in transitions, since we don't refcount + * the spaces. FIXME: That would be the proper solution. */ + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + (*csi)->numCondRefs += 1; + } + + /* Checks for bad usage of directives in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + checkAction( act ); +} + +/* This create an action that refs the original embed roots, if the optWrap arg + * is supplied. */ +Action *FsmCtx::newNfaWrapAction( const char *name, InlineList *inlineList, Action *optWrap ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "NONE"; + + Action *action = new Action( loc, name, inlineList, nextCondId++ ); + + if ( optWrap != 0 ) + action->embedRoots.append( optWrap->embedRoots ); + + actionList.append( action ); + return action; +} + +void FsmCtx::createNfaActions( FsmAp *fsm ) +{ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + /* Move pop restore actions into poptest. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->restoreTable; ati.lte(); ati++ ) { + n->popTest.setAction( ati->key, ati->value ); + } + + /* Move pop actions into pop test. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->popFrom; ati.lte(); ati++ ) { + + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + ati->value, InlineItem::NfaWrapAction ) ); + Action *wrap = newNfaWrapAction( "action_wrap", il1, ati->value ); + n->popTest.setAction( ORD_COND2, wrap ); + } + + /* Move condition evaluation into pop test. Wrap with condition + * execution. */ + if ( n->popCondSpace != 0 ) { + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + n->popCondSpace, n->popCondKeys, + InlineItem::NfaWrapConds ) ); + Action *wrap = newNfaWrapAction( "cond_wrap", il1, 0 ); + n->popTest.setAction( ORD_COND, wrap ); + } + + /* Move pop actions into pop test. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->popAction; ati.lte(); ati++ ) { + + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + ati->value, InlineItem::NfaWrapAction ) ); + Action *wrap = newNfaWrapAction( "action_wrap", il1, ati->value ); + n->popTest.setAction( ati->key, wrap ); + } + } + } + } +} + +void FsmCtx::prepareReduction( FsmAp *sectionGraph ) +{ + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() ) + sectionGraph->errState = sectionGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + sectionGraph->depthFirstOrdering(); + sectionGraph->sortStatesByFinal(); + sectionGraph->setStateNumbers( 0 ); +} + + +void translatedHostData( ostream &out, const std::string &data ) +{ + const char *p = data.c_str(); + for ( const char *c = p; *c != 0; ) { + if ( c[0] == '}' && ( c[1] == '@' || c[1] == '$' || c[1] == '=' ) ) { + out << "@}@" << c[1]; + c += 2; + } + else if ( c[0] == '@' ) { + out << "@@"; + c += 1; + } + // Have some escaping issues that these fix, but they lead to other problems. + // Can be reproduced by passing "={}" through ragel and adding --colm-backend + // else if ( c[0] == '=' ) { + // out << "@="; + // c += 1; + //} + // else if ( c[0] == '$' ) { + // out << "@$"; + // c += 1; + //} + else { + out << c[0]; + c += 1; + } + } +} + + +void FsmGbl::abortCompile( int code ) +{ + throw AbortCompile( code ); +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &FsmGbl::warning( const InputLoc &loc ) +{ + ostream &err = std::cerr; + err << loc << ": warning: "; + return err; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &FsmGbl::error() +{ + errorCount += 1; + ostream &err = std::cerr; + err << PROGNAME ": "; + return err; +} + +ostream &FsmGbl::error( const InputLoc &loc ) +{ + errorCount += 1; + ostream &err = std::cerr; + err << loc << ": "; + return err; +} + +ostream &FsmGbl::error_plain() +{ + errorCount += 1; + ostream &err = std::cerr; + return err; +} + + +std::ostream &FsmGbl::stats() +{ + return std::cout; +} + +/* Requested info. */ +std::ostream &FsmGbl::info() +{ + return std::cout; +} + +ostream &operator<<( ostream &out, const InputLoc &loc ) +{ + assert( loc.fileName != 0 ); + switch ( errorFormat ) { + case ErrorFormatMSVC: + out << loc.fileName << "(" << loc.line; + if ( loc.col ) + out << "," << loc.col; + out << ")"; + break; + + default: + out << loc.fileName << ":" << loc.line; + if ( loc.col ) + out << ":" << loc.col; + break; + } + return out; +} + diff --git a/libfsm/inputdata.cc b/libfsm/inputdata.cc new file mode 100644 index 00000000..66ec4afb --- /dev/null +++ b/libfsm/inputdata.cc @@ -0,0 +1,1150 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "common.h" +#include "inputdata.h" +#include "parsedata.h" +#include "load.h" +#include "rlscan.h" +#include "reducer.h" +#include "version.h" +#include "pcheck.h" +#include <colm/colm.h> + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <unistd.h> +#include <sstream> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#if defined(HAVE_SYS_WAIT_H) +#include <sys/wait.h> +#endif + +#ifdef _WIN32 +#include <windows.h> +#include <psapi.h> +#include <time.h> +#include <io.h> +#include <process.h> + +#if _MSC_VER +#define S_IRUSR _S_IREAD +#define S_IWUSR _S_IWRITE +#endif +#endif + +using std::istream; +using std::ifstream; +using std::ofstream; +using std::stringstream; +using std::ostream; +using std::endl; +using std::ios; + +InputData::~InputData() +{ + inputItems.empty(); + parseDataList.empty(); + sectionList.empty(); + + for ( Vector<const char**>::Iter fns = streamFileNames; fns.lte(); fns++ ) { + const char **ptr = *fns; + while ( *ptr != 0 ) { + ::free( (void*)*ptr ); + ptr += 1; + } + free( (void*) *fns ); + } + + if ( outputFileName != 0 ) + delete[] outputFileName; + + if ( histogramFn != 0 ) + ::free( (void*)histogramFn ); + + if ( histogram != 0 ) + delete[] histogram; + + for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) + free( (void*) *bl ); +} + +void InputData::makeDefaultFileName() +{ + if ( outputFileName == 0 ) + outputFileName = (hostLang->defaultOutFn)( inputFileName ); +} + +bool InputData::isBreadthLabel( const string &label ) +{ + for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) { + if ( label == *bl ) + return true; + } + return false; +} + +void InputData::createOutputStream() +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 ) { + if ( strcmp( inputFileName, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Create the filter on the output and open it. */ + outFilter = new output_filter( outputFileName ); + + /* Open the output stream, attaching it to the filter. */ + outStream = new ostream( outFilter ); + } + else { + /* Writing out to std out. */ + outStream = &std::cout; + } +} + +void InputData::openOutput() +{ + if ( outFilter != 0 ) { + outFilter->open( outputFileName, ios::out|ios::trunc ); + if ( !outFilter->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + abortCompile( 1 ); + } + } +} + +void InputData::prepareSingleMachine() +{ + ParseData *pd = 0; + GraphDictEl *gdEl = 0; + + /* Locate a machine spec to generate dot output for. We can only emit. + * Dot takes one graph at a time. */ + if ( machineSpec != 0 ) { + /* Machine specified. */ + ParseDataDictEl *pdEl = parseDataDict.find( machineSpec ); + if ( pdEl == 0 ) + error() << "could not locate machine specified with -S and/or -M" << endp; + pd = pdEl->value; + } + else { + /* No machine spec given, generate the first one. */ + if ( parseDataList.length() == 0 ) + error() << "no machine specification to generate graphviz output" << endp; + + pd = parseDataList.head; + } + + if ( machineName != 0 ) { + gdEl = pd->graphDict.find( machineName ); + if ( gdEl == 0 ) + error() << "machine definition/instantiation not found" << endp; + } + else { + /* We are using the whole machine spec. Need to make sure there + * are instances in the spec. */ + if ( pd->instanceList.length() == 0 ) + error() << "no machine instantiations to generate graphviz output" << endp; + } + + pd->prepareMachineGen( gdEl, hostLang ); + dotGenPd = pd; +} + +void InputData::prepareAllMachines() +{ + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) { + pd->prepareMachineGen( 0, hostLang ); + + pd->makeExports(); + } + + } +} + +void InputData::generateReduced() +{ + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) + pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang ); + } +} + +void InputData::verifyWriteHasData( InputItem *ii ) +{ + if ( ii->type == InputItem::Write ) { + if ( ii->pd->cgd == 0 ) + error( ii->loc ) << ii->pd->sectionName << ": no machine instantiations to write" << endl; + } +} + +void InputData::verifyWritesHaveData() +{ + for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ ) + verifyWriteHasData( ii ); +} + +void InputData::writeOutput( InputItem *ii ) +{ + /* If it is the first input item then check if we need to write the BOM. */ + if ( ii->prev == 0 && utf8BomPresent ) + *outStream << (uchar)0xEF << (uchar)0xBB << (uchar) 0xBF; + + switch ( ii->type ) { + case InputItem::Write: { + CodeGenData *cgd = ii->pd->cgd; + cgd->writeStatement( ii->loc, ii->writeArgs.size(), + ii->writeArgs, generateDot, hostLang ); + break; + } + case InputItem::HostData: { + switch ( hostLang->backend ) { + case Direct: + if ( ii->loc.fileName != 0 ) { + if ( ii->prev != 0 ) + *outStream << "\n"; + (*hostLang->genLineDirective)( *outStream, !noLineDirectives, ii->loc.line, ii->loc.fileName ); + } + + *outStream << ii->data.str(); + break; + case Translated: + openHostBlock( '@', this, *outStream, inputFileName, ii->loc.line ); + translatedHostData( *outStream, ii->data.str() ); + *outStream << "}@"; + break; + } + break; + } + case InputItem::EndSection: { + break; + } + } +} + +void InputData::closeOutput() +{ + /* If writing to a file, delete the ostream, causing it to flush. + * Standard out is flushed automatically. */ + if ( outputFileName != 0 ) { + delete outStream; + delete outFilter; + } +} + +void InputData::processDot() +{ + /* Compiles the DOT machines. */ + prepareSingleMachine(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + createOutputStream(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* + * From this point on we should not be reporting any errors. + */ + + openOutput(); + writeDot( *outStream ); + closeOutput(); +} + +bool InputData::checkLastRef( InputItem *ii ) +{ + if ( generateDot ) + return true; + + if ( errorCount > 0 ) + return false; + + /* + * 1. Go forward to next last reference. + * 2. Fully process that machine, mark as processed. + * 3. Move forward through input items until no longer + */ + if ( ii->section != 0 && ii->section->lastReference == ii ) { + /* Fully Process. */ + ParseData *pd = ii->pd; + + if ( pd->instanceList.length() > 0 ) { +#ifdef WITH_RAGEL_KELBT + if ( ii->parser != 0 ) + ii->parser->terminateParser(); +#endif + + FsmRes res = pd->prepareMachineGen( 0, hostLang ); + + /* Compute exports from the export definitions. */ + pd->makeExports(); + + if ( !res.success() ) + return false; + + if ( errorCount > 0 ) + return false; + + pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang ); + + if ( errorCount > 0 ) + return false; + } + + /* Mark all input items referencing the machine as processed. */ + InputItem *toMark = lastFlush; + while ( true ) { + toMark->processed = true; + + if ( toMark == ii ) + break; + + toMark = toMark->next; + } + + /* Move forward, flushing input items until we get to an unprocessed + * input item. */ + while ( lastFlush != 0 && lastFlush->processed ) { + verifyWriteHasData( lastFlush ); + + if ( errorCount > 0 ) + return false; + + /* Flush out. */ + writeOutput( lastFlush ); + + lastFlush = lastFlush->next; + } + } + return true; +} + +void InputData::makeFirstInputItem() +{ + /* Make the first input item. */ + InputItem *firstInputItem = new InputItem; + firstInputItem->type = InputItem::HostData; + firstInputItem->loc.fileName = inputFileName; + firstInputItem->loc.line = 1; + firstInputItem->loc.col = 1; + inputItems.append( firstInputItem ); +} + +/* Send eof to all parsers. */ +void InputData::terminateAllParsers( ) +{ +#ifdef WITH_RAGEL_KELBT + for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ ) + pdel->value->terminateParser(); +#endif +} + +void InputData::flushRemaining() +{ + InputItem *item = inputItems.head; + + while ( item != 0 ) { + checkLastRef( item ); + item = item->next; + } + + /* Flush remaining items. */ + while ( lastFlush != 0 ) { + /* Flush out. */ + writeOutput( lastFlush ); + + lastFlush = lastFlush->next; + } +} + +void InputData::makeTranslateOutputFileName() +{ + origOutputFileName = outputFileName; + outputFileName = fileNameFromStem( outputFileName, ".ri" ); + genOutputFileName = outputFileName; +} + +#ifdef WITH_RAGEL_KELBT +void InputData::parseKelbt() +{ + /* + * Ragel Parser from ragel 6. + */ + ifstream *inFileStream; + istream *inStream; + + /* Open the input file for reading. */ + assert( inputFileName != 0 ); + inFileStream = new ifstream( inputFileName ); + if ( ! inFileStream->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endp; + inStream = inFileStream; + + makeFirstInputItem(); + + Scanner scanner( this, inputFileName, *inStream, 0, 0, 0, false ); + + scanner.sectionPass = true; + scanner.do_scan(); + + inStream->clear(); + inStream->seekg( 0, std::ios::beg ); + curItem = inputItems.head; + lastFlush = inputItems.head; + + scanner.sectionPass = false; + scanner.do_scan(); + + /* Finished, final check for errors.. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* Bail on above error. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + delete inFileStream; +} + +void InputData::processKelbt() +{ + /* With the kelbt version we implement two parse passes. The first is used + * to identify the last time that any given machine is referenced by a + * ragel section. In the second pass we parse, compile, and emit as far + * forward as possible when we encounter the last reference to a machine. + * */ + + if ( generateDot ) { + parseKelbt(); + terminateAllParsers(); + processDot(); + } + else { + createOutputStream(); + openOutput(); + parseKelbt(); + flushRemaining(); + closeOutput(); + } + + assert( errorCount == 0 ); +} +#endif + +bool InputData::parseReduce() +{ + /* + * Colm-based reduction parser introduced in ragel 7. + */ + + TopLevel *topLevel = new TopLevel( frontendSections, this, hostLang, + minimizeLevel, minimizeOpt ); + + /* Check input file. File is actually opened by colm code. We don't + * need to perform the check if in libragel since it comes in via a + * string. */ + if ( input == 0 ) { + ifstream *inFile = new ifstream( inputFileName ); + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endp; + delete inFile; + } + + if ( errorCount ) + return false; + + makeFirstInputItem(); + + curItem = inputItems.head; + lastFlush = inputItems.head; + + + topLevel->reduceFile( "rlparse", inputFileName ); + + if ( errorCount ) + return false; + + bool success = topLevel->success; + + delete topLevel; + return success; +} + +bool InputData::processReduce() +{ + if ( generateDot ) { + parseReduce(); + processDot(); + return true; + } + else { + createOutputStream(); + openOutput(); + + bool success = parseReduce(); + if ( success ) + flushRemaining(); + + closeOutput(); + + if ( !success && outputFileName != 0 ) + unlink( outputFileName ); + + return success; + } +} + +bool InputData::process() +{ + switch ( frontend ) { + case KelbtBased: { +#ifdef WITH_RAGEL_KELBT + processKelbt(); +#endif + return true; + } + case ReduceBased: { + return processReduce(); + } + } + return false; +} + +/* Print a summary of the options. */ +void InputData::usage() +{ + info() << +"usage: ragel [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +" -s Print some statistics and compilation info to stderr\n" +" -d Do not remove duplicates from action lists\n" +" -I <dir> Add <dir> to the list of directories to search\n" +" for included an imported files\n" +" --rlhc Show the rlhc command used to compile\n" +" --save-temps Do not delete intermediate file during compilation\n" +" --no-intermediate Disable call to rlhc, leave behind intermediate\n" +"error reporting format:\n" +" --error-format=gnu file:line:column: message (default)\n" +" --error-format=msvc file(line,column): message\n" +"fsm minimization:\n" +" -n Do not perform minimization\n" +" -m Minimize at the end of the compilation\n" +" -l Minimize after most operations (default)\n" +" -e Minimize after every operation\n" +"visualization:\n" +" -V Generate a dot file for Graphviz\n" +" -p Display printable characters on labels\n" +" -S <spec> FSM specification to output (for graphviz output)\n" +" -M <machine> Machine definition/instantiation to output (for\n" +" graphviz output)\n" +"host language:\n" +" -C C, C++, Obj-C or Obj-C++ (default)\n" +" All code styles supported.\n" +" --asm --gas-x86-64-sys-v\n" +" GNU AS, x86_64, System V ABI.\n" +" Generated in a code style equivalent to -G2\n" +" -D D All code styles supported\n" +" -Z Go All code styles supported\n" +" -A C# -T0 -T1 -F0 -F1 -G0 -G1\n" +" -J Java -T0 -T1 -F0 -F1\n" +" -R Ruby -T0 -T1 -F0 -F1\n" +" -O OCaml -T0 -T1 -F0 -F1\n" +" -U Rust -T0 -T1 -F0 -F1\n" +" -Y Julia -T0 -T1 -F0 -F1\n" +" -K Crack -T0 -T1 -F0 -F1\n" +" -P JavaScript -T0 -T1 -F0 -F1\n" +"line directives:\n" +" -L Inhibit writing of #line directives\n" +"code style:\n" +" -T0 Binary search (default)\n" +" -T1 Binary search with expanded actions \n" +" -F0 Flat table\n" +" -F1 Flat table with expanded actions\n" +" -G0 Switch-driven\n" +" -G1 Switch-driven with expanded actions\n" +" -G2 Goto-driven with expanded actions\n" +"large machines:\n" +" --integral-tables Use integers for table data (default)\n" +" --string-tables Encode table data into strings for faster host lang\n" +" compilation\n" +"analysis:\n" +" --prior-interaction Search for condition-based general repetitions\n" +" that will not function properly due to state mod\n" +" overlap and must be NFA reps. \n" +" --conds-depth=D Search for high-cost conditions inside a prefix\n" +" of the machine (depth D from start state).\n" +" --state-limit=L Report fail if number of states exceeds this\n" +" during compilation.\n" +" --breadth-check=E1,E2,.. Report breadth cost of named entry points and\n" +" the start state.\n" +" --input-histogram=FN Input char histogram for breadth check. If\n" +" unspecified a flat histogram is used.\n" +"testing:\n" +" --kelbt-frontend Compile using original ragel + kelbt frontend\n" +" Requires ragel be built with ragel + kelbt support\n" +" --colm-frontend Compile using a colm-based recursive descent\n" +" frontend\n" +" --reduce-frontend Compile using a colm-based reducer (default)\n" +" --var-backend Use the variable-based backend even if the host lang\n" +" supports goto-based\n" +" --supported-host-langs Show supported host languages by command line arg\n" +" --supported-frontends Show supported frontends\n" +" --supported-backends Show supported backends\n" +" --force-libragel Cause mainline to behave like libragel\n" + ; + + abortCompile( 0 ); +} + +/* Print version information and exit. */ +void InputData::version() +{ + info() << "Ragel State Machine Compiler version " RAGEL_VERSION << " " RAGEL_PUBDATE << endl << + "Copyright (c) 2001-2019 by Adrian Thurston et al." << endl; + abortCompile( 0 ); +} + +void InputData::showFrontends() +{ + ostream &out = info(); + out << "--colm-frontend"; + out << " --reduce-frontend"; +#ifdef WITH_RAGEL_KELBT + out << " --kelbt-frontend"; +#endif + out << endl; + abortCompile( 0 ); +} + +void InputData::showBackends() +{ + info() << + "--direct-backend --colm-backend" << endl; + abortCompile( 0 ); +} + +InputLoc makeInputLoc( const char *fileName, int line, int col ) +{ + InputLoc loc( fileName, line, col ); + return loc; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +void InputData::parseArgs( int argc, const char **argv ) +{ + ParamCheck pc( "o:dnmleabjkS:M:I:vHh?-:sT:F:W:G:LpV", argc, argv ); + + /* Decide if we were invoked using a path variable, or with an explicit path. */ + const char *lastSlash = strrchr( argv[0], '/' ); + if ( lastSlash == 0 ) { + /* Defualt to the the binary install location. */ + dirName = BINDIR; + } + else { + /* Compute dirName from argv0. */ + dirName = string( argv[0], lastSlash - argv[0] ); + } + + /* FIXME: Need to check code styles VS langauge. */ + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + case 'V': + generateDot = true; + break; + + /* Output. */ + case 'o': + if ( *pc.paramArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = new char[strlen(pc.paramArg)+1]; + strcpy( (char*)outputFileName, pc.paramArg ); + } + break; + + /* Flag for turning off duplicate action removal. */ + case 'd': + wantDupsRemoved = false; + break; + + /* Minimization, mostly hidden options. */ + case 'n': + minimizeOpt = MinimizeNone; + break; + case 'm': + minimizeOpt = MinimizeEnd; + break; + case 'l': + minimizeOpt = MinimizeMostOps; + break; + case 'e': + minimizeOpt = MinimizeEveryOp; + break; + case 'a': + #ifdef TO_UPGRADE_CONDS + minimizeLevel = MinimizeApprox; + #else + error() << "minimize approx (-a) unsupported in this version" << endp; + #endif + break; + case 'b': + #ifdef TO_UPGRADE_CONDS + minimizeLevel = MinimizeStable; + #else + error() << "minimize stable (-b) unsupported in this version" << endp; + #endif + break; + case 'j': + minimizeLevel = MinimizePartition1; + break; + case 'k': + minimizeLevel = MinimizePartition2; + break; + + /* Machine spec. */ + case 'S': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -S" << endl; + else if ( machineSpec != 0 ) + error() << "more than one -S argument was given" << endl; + else { + /* Ok, remember the path to the machine to generate. */ + machineSpec = pc.paramArg; + } + break; + + /* Machine path. */ + case 'M': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -M" << endl; + else if ( machineName != 0 ) + error() << "more than one -M argument was given" << endl; + else { + /* Ok, remember the machine name to generate. */ + machineName = pc.paramArg; + } + break; + + case 'I': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -I" << endl; + else { + includePaths.append( pc.paramArg ); + } + break; + + /* Version and help. */ + case 'v': + version(); + break; + case 'H': case 'h': case '?': + usage(); + break; + case 's': + printStatistics = true; + break; + case '-': { + char *arg = strdup( pc.paramArg ); + char *eq = strchr( arg, '=' ); + + if ( eq != 0 ) + *eq++ = 0; + + if ( strcmp( arg, "help" ) == 0 ) + usage(); + else if ( strcmp( arg, "version" ) == 0 ) + version(); + else if ( strcmp( arg, "error-format" ) == 0 ) { + if ( eq == 0 ) + error() << "expecting '=value' for error-format" << endl; + else if ( strcmp( eq, "gnu" ) == 0 ) + errorFormat = ErrorFormatGNU; + else if ( strcmp( eq, "msvc" ) == 0 ) + errorFormat = ErrorFormatMSVC; + else + error() << "invalid value for error-format" << endl; + } + else if ( strcmp( arg, "rlhc" ) == 0 ) + rlhc = true; + else if ( strcmp( arg, "no-intermediate" ) == 0 ) + noIntermediate = true; +#ifdef WITH_RAGEL_KELBT + else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) { + frontend = KelbtBased; + frontendSpecified = true; + } +#else + else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) { + error() << "--kelbt-frontend specified but, " + "ragel not built with ragel+kelbt support" << endp; + } +#endif + else if ( strcmp( arg, "reduce-frontend" ) == 0 ) { + frontend = ReduceBased; + frontendSpecified = true; + } + else if ( strcmp( arg, "string-tables" ) == 0 ) + stringTables = true; + else if ( strcmp( arg, "integral-tables" ) == 0 ) + stringTables = false; + else if ( strcmp( arg, "supported-frontends" ) == 0 ) + showFrontends(); + else if ( strcmp( arg, "supported-backends" ) == 0 ) + showBackends(); + else if ( strcmp( arg, "save-temps" ) == 0 ) + saveTemps = true; + + else if ( strcmp( arg, "prior-interaction" ) == 0 ) + checkPriorInteraction = true; + else if ( strcmp( arg, "conds-depth" ) == 0 ) + condsCheckDepth = strtol( eq, 0, 10 ); + else if ( strcmp( arg, "state-limit" ) == 0 ) + stateLimit = strtol( eq, 0, 10 ); + + else if ( strcmp( arg, "breadth-check" ) == 0 ) { + char *ptr = 0; + while ( true ) { + char *label = strtok_r( eq, ",", &ptr ); + eq = NULL; + if ( label == NULL ) + break; + breadthLabels.append( strdup( label ) ); + } + checkBreadth = true; + } + else if ( strcmp( arg, "input-histogram" ) == 0 ) + histogramFn = strdup(eq); + else if ( strcmp( arg, "var-backend" ) == 0 ) + forceVar = true; + else if ( strcmp( arg, "no-fork" ) == 0 ) + noFork = true; + else { + error() << "--" << pc.paramArg << + " is an invalid argument" << endl; + } + free( arg ); + break; + } + + /* Passthrough args. */ + case 'T': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenBinaryLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenBinaryExp; + else { + error() << "-T" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'F': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenFlatLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenFlatExp; + else { + error() << "-F" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'G': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenGotoLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenGotoExp; + else if ( pc.paramArg[0] == '2' ) + codeStyle = GenIpGoto; + else if ( pc.paramArg[0] == 'T' && pc.paramArg[1] == '2' ) { + codeStyle = GenIpGoto; + maxTransitions = 32; + } else { + error() << "-G" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'W': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenSwitchLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenSwitchExp; + else { + error() << "-G" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + + case 'p': + displayPrintables = true; + break; + + case 'L': + noLineDirectives = true; + break; + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } +} + +void InputData::loadHistogram() +{ + const int alphsize = 256; + + /* Init a default. */ + histogram = new double[alphsize]; + ifstream h( histogramFn ); + if ( !h.is_open() ) + error() << "histogram read: failed to open file: " << histogramFn << endp; + + int i = 0; + double value; + while ( true ) { + if ( h >> value ) { + if ( i >= alphsize ) { + /* Too many items. */ + error() << "histogram read: too many histogram values," + " expecting " << alphsize << " (for char alphabet)" << endp; + } + histogram[i] = value; + i++; + } + else { + /* Read failure. */ + if ( h.eof() ) { + if ( i < alphsize ) { + error() << "histogram read: fell short of " << + alphsize << " items" << endp; + } + break; + } + else { + error() << "histogram read: error at item " << i << endp; + } + } + } +} + +void InputData::defaultHistogram() +{ + /* Flat histogram. */ + const int alphsize = 256; + histogram = new double[alphsize]; + for ( int i = 0; i < alphsize; i++ ) { + histogram[i] = 1.0 / (double)alphsize; + } +} + +void InputData::checkArgs() +{ + /* Require an input file. If we use standard in then we won't have a file + * name on which to base the output. */ + if ( inputFileName == 0 ) + error() << "no input file given" << endl; + + /* Bail on argument processing errors. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endp; + } + + if ( !frontendSpecified ) + frontend = ReduceBased; + + if ( checkBreadth ) { + if ( histogramFn != 0 ) + loadHistogram(); + else + defaultHistogram(); + } +} + +char *InputData::readInput( const char *inputFileName ) +{ + struct stat st; + int res = stat( inputFileName, &st ); + if ( res != 0 ) { + error() << inputFileName << ": stat failed: " << strerror(errno) << endl; + return 0; + } + + std::ifstream in( inputFileName ); + if ( !in.is_open() ) { + error() << inputFileName << ": could not open in force-libragel mode"; + return 0; + } + + char *input = new char[st.st_size+1]; + in.read( input, st.st_size ); + if ( in.gcount() != st.st_size ) { + error() << inputFileName << ": could not read in force-libragel mode"; + delete[] input; + return 0; + } + input[st.st_size] = 0; + + return input; +} + +int InputData::main( int argc, const char **argv ) +{ + int code = 0; + try { + parseArgs( argc, argv ); + checkArgs(); + if ( !generateDot ) + makeDefaultFileName(); + + if ( !process() ) + abortCompile( 1 ); + } + catch ( const AbortCompile &ac ) { + code = ac.code; + } + + return code; +} + +int InputData::runFrontend( int argc, const char **argv ) +{ + if ( !process() ) + return -1; + return 0; +} + +int InputData::runRlhc( int argc, const char **argv ) +{ + struct colm_program *prg; + int exit_status; + + prg = colm_new_program( rlhcSections ); + colm_set_debug( prg, 0 ); + colm_run_program( prg, argc, argv ); + exit_status = colm_delete_program( prg ); + return exit_status; +} + +/* Run a job (frontend or backend). If we want forks then we return the result + * via the process's exit code. otherwise it comes back on the stack. */ +int InputData::runJob( const char *what, IdProcess idProcess, int argc, const char **argv ) +{ +#if defined(HAVE_SYS_WAIT_H) + if ( !noFork ) { + pid_t pid = fork(); + + if ( pid == 0 ) { + int es = (this->*idProcess)( argc, argv ); + exit( es ); + } + + int status = 0; + waitpid( pid, &status, 0 ); + if ( WIFSIGNALED(status) ) { + error() << what << " stopped by signal: " << WTERMSIG(status) << std::endl; + return -1; + } + + return WEXITSTATUS( status ); + } +#endif + return (this->*idProcess)( argc, argv ); +} + +int InputData::rlhcMain( int argc, const char **argv ) +{ + parseArgs( argc, argv ); + checkArgs(); + makeDefaultFileName(); + makeTranslateOutputFileName(); + + int es = runJob( "frontend", &InputData::runFrontend, 0, 0 ); + + if ( es != 0 ) + return es; + + /* rlhc <input> <output> */ + const char *_argv[] = { "rlhc", + genOutputFileName.c_str(), + origOutputFileName.c_str(), 0 }; + + return runJob( "rlhc", &InputData::runRlhc, 3, _argv ); +} diff --git a/libfsm/inputdata.h b/libfsm/inputdata.h new file mode 100644 index 00000000..689f9078 --- /dev/null +++ b/libfsm/inputdata.h @@ -0,0 +1,365 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _INPUT_DATA +#define _INPUT_DATA + +#include "gendata.h" +#include <iostream> +#include <sstream> +#include <vector> + +struct ParseData; +struct Parser6; +struct CondSpace; +struct CondAp; +struct ActionTable; +struct Section; +struct LangFuncs; + +void translatedHostData( ostream &out, const string &data ); + +struct InputItem +{ + InputItem() + : + section(0), + pd(0), + parser(0), + processed(false) + {} + + enum Type { + HostData, + EndSection, + Write, + }; + + Type type; + std::ostringstream data; + std::string name; + Section *section; + ParseData *pd; + Parser6 *parser; + std::vector<std::string> writeArgs; + + InputLoc loc; + bool processed; + + InputItem *prev, *next; +}; + +struct IncItem +{ + IncItem() + : + section(0) + {} + + Section *section; + InputLoc loc; + long start, end; + size_t length; + IncItem *prev, *next; +}; + + +typedef AvlMap<std::string, ParseData*, CmpString> ParseDataDict; +typedef AvlMapEl<std::string, ParseData*> ParseDataDictEl; +typedef DList<ParseData> ParseDataList; + +/* This exists for ragel-6 parsing. */ +typedef AvlMap<const char*, Parser6*, CmpStr> ParserDict; +typedef AvlMapEl<const char*, Parser6*> ParserDictEl; +typedef DList<Parser6> ParserList; + +typedef DList<InputItem> InputItemList; +typedef DList<IncItem> IncItemList; +typedef Vector<const char *> ArgsVector; + +struct Section +{ + Section( std::string sectionName ) + : + sectionName(sectionName), + lastReference(0) + {} + + std::string sectionName; + + /* Pointer to the last input item to reference this parse data struct. Once + * we pass over this item we are free to clear away the parse tree. */ + InputItem *lastReference; + + Section *prev, *next; +}; + +typedef AvlMap<std::string, Section*, CmpString> SectionDict; +typedef AvlMapEl<std::string, Section*> SectionDictEl; +typedef DList<Section> SectionList; + +struct FnMachine +{ + FnMachine( const string &fileName, const string &machine ) + : fileName( fileName ), machine( machine ) {} + + string fileName; + string machine; +}; + +struct CmpFnMachine +{ + static inline int compare( const FnMachine &k1, const FnMachine &k2 ) + { + int r = strcmp( k1.fileName.c_str(), k2.fileName.c_str() ); + if ( r != 0 ) + return r; + else { + r = strcmp( k1.machine.c_str(), k2.machine.c_str() ); + if ( r != 0 ) + return r; + } + return 0; + } +}; + +struct IncludeRec + : public AvlTreeEl<IncludeRec> +{ + IncludeRec( const string &fileName, const string &machine ) + : key( fileName, machine ), data(0) {} + + ~IncludeRec() + { + if ( data != 0 ) + delete[] data; + } + + FnMachine key; + + const FnMachine &getKey() + { return key; } + + std::string foundFileName; + + char *data; + int len; + +}; + +struct InputData +: + public FsmGbl +{ + InputData( const HostLang *hostLang, + struct colm_sections *frontendSections, struct colm_sections *rlhcSections ) + : + FsmGbl(hostLang), + frontendSections(frontendSections), + rlhcSections(rlhcSections), + inputFileName(0), + outputFileName(0), + nextMachineId(0), + inStream(0), + outStream(0), + outFilter(0), + curItem(0), + lastFlush(0), + codeStyle(GenBinaryLoop), + dotGenPd(0), + machineSpec(0), + machineName(0), + generateDot(false), + noLineDirectives(false), + maxTransitions(LONG_MAX), + numSplitPartitions(0), + rlhc(false), + rlhcShowCmd(false), + noIntermediate(false), + frontendSpecified(false), + backendSpecified(false), + featureSpecified(false), + saveTemps(false), + condsCheckDepth(-1), + transSpanDepth(6), + stateLimit(0), + checkBreadth(0), + varBackend(false), + histogramFn(0), + histogram(0), + input(0), + forceVar(false), + noFork(false), + utf8BomPresent(false) + {} + + ~InputData(); + + void usage(); + void version(); + void showFrontends(); + void showBackends(); + + struct colm_sections *frontendSections; + struct colm_sections *rlhcSections; + std::string dirName; + + /* The name of the root section, this does not change during an include. */ + const char *inputFileName; + const char *outputFileName; + + string comm; + + int nextMachineId; + + std::string origOutputFileName; + std::string genOutputFileName; + + /* Io globals. */ + std::istream *inStream; + std::ostream *outStream; + output_filter *outFilter; + + ParseDataDict parseDataDict; + ParseDataList parseDataList; + InputItemList inputItems; + InputItem *curItem; + InputItem *lastFlush; + + /* Ragel-6 frontend. */ + ParserDict parserDict; + ParserList parserList; + + SectionDict sectionDict; + SectionList sectionList; + + ArgsVector includePaths; + + bool isBreadthLabel( const string &label ); + ArgsVector breadthLabels; + + /* Target language and output style. */ + CodeStyle codeStyle; + + ParseData *dotGenPd; + + const char *machineSpec; + const char *machineName; + + bool generateDot; + + bool noLineDirectives; + + long maxTransitions; + int numSplitPartitions; + + bool rlhc; + bool rlhcShowCmd; + bool noIntermediate; + + bool frontendSpecified; + RagelFrontend frontend; + + bool backendSpecified; + + bool featureSpecified; + + bool saveTemps; + long condsCheckDepth; + long transSpanDepth; + long stateLimit; + bool checkBreadth; + + bool varBackend; + + const char *histogramFn; + double *histogram; + + const char *input; + + Vector<const char**> streamFileNames; + + bool forceVar; + bool noFork; + + /* Did the input file have a byte order mark? */ + bool utf8BomPresent; + + void verifyWriteHasData( InputItem *ii ); + void verifyWritesHaveData(); + + void makeTranslateOutputFileName(); + void flushRemaining(); + void makeFirstInputItem(); + void writeOutput(); + void makeDefaultFileName(); + void createOutputStream(); + void openOutput(); + void closeOutput(); + void generateReduced(); + void prepareSingleMachine(); + void prepareAllMachines(); + + void writeOutput( InputItem *ii ); + void writeLanguage( std::ostream &out ); + + bool checkLastRef( InputItem *ii ); + + void parseKelbt(); + void processDot(); + void processCodeEarly(); + + void writeDot( std::ostream &out ); + + void loadHistogram(); + void defaultHistogram(); + + void parseArgs( int argc, const char **argv ); + void checkArgs(); + void terminateParser( Parser6 *parser ); + void terminateAllParsers(); + + void processKelbt(); + void processColm(); + bool processReduce(); + bool process(); + bool parseReduce(); + + char *readInput( const char *inputFileName ); + + const char **makeIncludePathChecks( const char *curFileName, const char *fileName ); + std::ifstream *tryOpenInclude( const char **pathChecks, long &found ); + int main( int argc, const char **argv ); + + int runFrontend( int argc, const char **argv ); + int runRlhc( int argc, const char **argv ); + + typedef int (InputData::*IdProcess)( int argc, const char **argv ); + + int runJob( const char *what, IdProcess idProcess, + int argc, const char **argv ); + + int rlhcMain( int argc, const char **argv ); +}; + + +#endif diff --git a/libfsm/ipgoto.cc b/libfsm/ipgoto.cc new file mode 100644 index 00000000..4b8af3d6 --- /dev/null +++ b/libfsm/ipgoto.cc @@ -0,0 +1,765 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "ipgoto.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" +#include "parsedata.h" +#include "inputdata.h" + +#include <sstream> + +using std::ostringstream; + +void IpGoto::tableDataPass() +{ + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void IpGoto::genAnalysis() +{ + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose single. */ + redFsm->moveSelectTransToSingle(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + +bool IpGoto::useAgainLabel() +{ + return redFsm->anyActionRets() || + redFsm->anyActionByValControl() || + redFsm->anyRegNextStmt(); +} + +void IpGoto::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + ret << "goto " << stLabel[gotoDest].reference() << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << + "; " << TOP() << "+= 1; "; + + ret << "goto " << stLabel[callDest].reference() << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << + "; " << TOP() << "+= 1; " << vCS() << " = " << callDest << "; " << + CLOSE_GEN_BLOCK(); +} + +void IpGoto::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << "; " << TOP() << "+= 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + ret << " goto " << _again << ";"; + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << targState << "; " << TOP() << "+= 1;" << + vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void IpGoto::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " + << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << "goto " << _again << ";" << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " + << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << vCS() << " = " << nextDest << ";"; +} + +void IpGoto::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << vCS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << ");"; +} + +void IpGoto::CURS( ostream &ret, bool inFinish ) +{ + ret << "(" << ps << ")"; +} + +void IpGoto::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << targState; +} + +void IpGoto::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << OPEN_GEN_BLOCK() << P() << "+= 1; "; + if ( !csForced ) + ret << vCS() << " = " << targState << "; "; + ret << "goto " << _out << ";" << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << OPEN_GEN_BLOCK() << P() << "+= 1; "; + if ( !csForced ) + ret << vCS() << " = " << targState << "; "; + ret << nbreak << " = 1;" << CLOSE_GEN_BLOCK(); +} + +void IpGoto::NFA_PUSH_ACTION( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->push != 0 ) + act = targ->push->actListId+1; + nfaPushActions.value( act ); +} + +void IpGoto::NFA_POP_TEST( RedNfaTarg *targ ) +{ + int act = 0; + if ( targ->popTest != 0 ) + act = targ->popTest->actListId+1; + nfaPopTrans.value( act ); +} + + +bool IpGoto::IN_TRANS_ACTIONS( RedStateAp *state ) +{ + bool anyWritten = false; + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInConds; it++ ) { + RedCondPair *trans = state->inConds[it]; + if ( trans->action != 0 ) { + /* Remember that we wrote an action so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write the label for the transition so it can be jumped to. */ + if ( ctrLabel[trans->id].isReferenced ) + out << "_ctr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << vCS() << " = " << trans->targ->id << ";\n"; + + if ( redFsm->anyRegNbreak() ) + out << nbreak << " = 0;\n"; + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( trans->targ->id, false, + trans->action->anyNextStmt() ) ); + out << "\n"; + } + + if ( redFsm->anyRegNbreak() ) { + out << + "if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + + /* If the action contains a next then we need to reload, otherwise + * jump directly to the target state. */ + if ( trans->action->anyNextStmt() ) + out << "goto " << _again << ";\n"; + else + out << "goto " << stLabel[trans->targ->id].reference() << ";\n"; + } + } + + + return anyWritten; +} + +void IpGoto::GOTO_HEADER( RedStateAp *state ) +{ +} + +void IpGoto::STATE_GOTO_ERROR() +{ +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &IpGoto::TRANS_GOTO( RedTransAp *trans ) +{ + if ( trans->condSpace == 0 || trans->condSpace->condSet.length() == 0 ) { + /* Existing. */ + assert( trans->numConds() == 1 ); + RedCondPair *cond = trans->outCond( 0 ); + if ( cond->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << "goto " << ctrLabel[trans->p.id].reference() << ";"; + } + else { + /* Go directly to the target state. */ + out << "goto " << stLabel[cond->targ->id].reference() << ";"; + } + } + else { + out << ck << " = 0;\n"; + for ( GenCondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " )\n" << ck << " += " << condValOffset << ";\n"; + } + CondKey lower = 0; + CondKey upper = trans->condFullSize() - 1; + COND_B_SEARCH( trans, lower, upper, 0, trans->numConds() - 1 ); + + if ( trans->errCond() != 0 ) { + COND_GOTO( trans->errCond() ) << "\n"; + } + } + + return out; +} + +/* Emit the goto to take for a given transition. */ +std::ostream &IpGoto::COND_GOTO( RedCondPair *cond ) +{ + /* Existing. */ + if ( cond->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << "goto " << ctrLabel[cond->id].reference() << ";"; + } + else { + /* Go directly to the target state. */ + out << "goto " << stLabel[cond->targ->id].reference() << ";"; + } + + return out; +} + +std::ostream &IpGoto::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( outLabel[st->id].isReferenced ) { + out << outLabel[st->id].define() << ": " << vCS() << " = " << + st->id << "; goto " << _out << "; \n"; + } + if ( popLabel[st->id].isReferenced ) { + out << popLabel[st->id].define() << ": " << vCS() << " = " << + st->id << "; goto " << _pop << "; \n"; + } + } + return out; +} + +std::ostream &IpGoto::AGAIN_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << + "case " << st->id << ": goto " << stLabel[st->id].reference() << ";\n"; + } + return out; +} + +std::ostream &IpGoto::STATE_GOTO_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << "case " << st->id << ":\n"; + out << "goto st_case_" << st->id << ";\n"; + } + return out; +} + +void IpGoto::NFA_PUSH_ST( RedStateAp *state ) +{ + std::stringstream ss; + ss << state->id; + std::string _state = ss.str(); + + if ( redFsm->anyNfaStates() ) { + + if ( state->nfaTargs != 0 ) { + out << + "if ( " << ARR_REF( nfaOffsets ) << "[" << _state << "] != 0 ) {\n"; + + if ( red->nfaPrePushExpr != 0 ) { + out << + new_recs << " = " << state->nfaTargs->length() << ";\n"; + } + + if ( red->nfaPrePushExpr != 0 ) { + out << OPEN_HOST_BLOCK( red->nfaPrePushExpr ); + INLINE_LIST( out, red->nfaPrePushExpr->inlineList, 0, false, false ); + out << CLOSE_HOST_BLOCK(); + } + + int alt = 0; + for ( RedNfaTargs::Iter nt = *state->nfaTargs; nt.lte(); nt++ ) { + out << + " nfa_bp[nfa_len].state = " << nt->state->id << ";\n" + " nfa_bp[nfa_len].p = " << P() << ";\n"; + + if ( nt->popTest != 0 ) { + out << + " nfa_bp[nfa_len].popTrans = " << (nt->popTest->actListId+1) << ";\n"; + } + else if ( redFsm->bAnyNfaPops ) { + out << + " nfa_bp[nfa_len].popTrans = 0;\n"; + } + + if ( nt->push != 0 ) { + for ( GenActionTable::Iter item = nt->push->key; item.lte(); item++ ) + ACTION( out, item->value, IlOpts( 0, false, false ) ); + } + + out << + " nfa_len += 1;\n"; + + alt += 1; + } + + out << + "}\n"; + } + } +} + +std::ostream &IpGoto::STATE_GOTOS() +{ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + IN_TRANS_ACTIONS( st ); + + if ( stLabel[st->id].isReferenced ) + out << "_st" << st->id << ":\n"; + + /* Need to do this if the transition is an eof transition, or if + * the action contains fexec. Otherwise, no need. */ + if ( eof ) { + out << + "if ( " << P() << " == " << vEOF() << " )\n"; + + if ( st->isFinal || !redFsm->anyNfaStates() ) + out << "goto " << outLabel[st->id].reference() << ";\n"; + else + out << "goto " << popLabel[st->id].reference() << ";\n"; + } + + if ( st->toStateAction != 0 ) { + /* Write every action in the list. */ + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( st->id, false, + st->toStateAction->anyNextStmt() ) ); + out << "\n"; + } + } + + if ( st == redFsm->errState ) { + out << "st_case_" << st->id << ":\n"; + + /* Break out here. */ + if ( !redFsm->anyNfaStates() ) + out << "goto " << outLabel[st->id].reference() << ";\n"; + else + out << "goto " << popLabel[st->id].reference() << ";\n"; + } + else { + + /* Advance and test buffer pos. */ + if ( st->labelNeeded ) { + out << + P() << "+= 1;\n"; + } + + /* Give the st a switch case. */ + out << "st_case_" << st->id << ":\n"; + + if ( !noEnd ) { + if ( eof ) { + out << + "if ( " << P() << " == " << PE() << " && " << P() << " != " << vEOF() << " )\n" + " goto " << outLabel[st->id].reference() << ";\n"; + } + else { + out << + "if ( " << P() << " == " << PE() << " )\n" + " goto " << outLabel[st->id].reference() << ";\n"; + } + } + + + NFA_PUSH_ST( st ); + + if ( st->fromStateAction != 0 ) { + /* Write every action in the list. */ + for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) { + ACTION( out, item->value, IlOpts( st->id, false, + st->fromStateAction->anyNextStmt() ) ); + out << "\n"; + } + } + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( st->eofTrans != 0 ) + TRANS_GOTO( st->eofTrans ); + else { + if ( st->isFinal || !redFsm->anyNfaStates() ) + out << "goto " << outLabel[st->id].reference() << ";\n"; + else + out << "goto " << popLabel[st->id].reference() << ";\n"; + } + + out << + "}\n" + "else {\n"; + } + + /* Record the prev st if necessary. */ + if ( st->anyRegCurStateRef() ) + out << ps << " = " << st->id << ";\n"; + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + SINGLE_SWITCH( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) { + RANGE_B_SEARCH( st, keyOps->minKey, keyOps->maxKey, + 0, st->outRange.length() - 1 ); + } + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans ) << "\n"; + + if ( !noEnd && eof ) { + out << + "}\n"; + } + } + } + return out; +} + +std::ostream &IpGoto::FINISH_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + out << + "case " << st->id << ":\n"; + + TRANS_GOTO( st->eofTrans ); + } + } + + return out; +} + +void IpGoto::setLabelsNeeded( GenInlineList *inlineList ) +{ + for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case GenInlineItem::Goto: + case GenInlineItem::Call: + case GenInlineItem::Ncall: { + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( item->children ); + } +} + +void IpGoto::setLabelsNeeded( RedCondPair *pair ) +{ + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( pair->action == 0 || !pair->action->anyNextStmt() ) + pair->targ->labelNeeded = true; + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( pair->action != 0 ) { + /* Loop the actions. */ + for ( GenActionTable::Iter act = pair->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( act->value->inlineList ); + } + } +} + +/* Set up labelNeeded flag for each state. */ +void IpGoto::setLabelsNeeded() +{ + /* If we use the _again label, then we generate the _again switch, which + * uses all labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + setLabelsNeeded( &trans->p ); + } + + for ( CondApSet::Iter cond = redFsm->condSet; cond.lte(); cond++ ) + setLabelsNeeded( &cond->p ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + setLabelsNeeded( item->value->inlineList ); + } + } + } +} + +void IpGoto::writeData() +{ + STATE_IDS(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void IpGoto::NFA_FROM_STATE_ACTION_EXEC() +{ +// if ( redFsm->anyFromStateActions() ) { +// /* Unimplemented feature. Don't have the from state actions array in +// * this mode. Need to add it, or to alter the NFA pop codegen to be +// * consistent with the mode. */ +// assert( false ); +// } +} + +void IpGoto::writeExec() +{ + int maxCtrId = redFsm->nextCondId > redFsm->nextTransId ? redFsm->nextCondId : redFsm->nextTransId; + + stLabel = allocateLabels( stLabel, IpLabel::St, redFsm->nextStateId ); + ctrLabel = allocateLabels( ctrLabel, IpLabel::Ctr, maxCtrId ); + outLabel = allocateLabels( outLabel, IpLabel::Out, redFsm->nextStateId ); + popLabel = allocateLabels( popLabel, IpLabel::Pop, redFsm->nextStateId ); + + /* Must set labels immediately before writing because we may depend on the + * noend write option. */ + setLabelsNeeded(); + + out << "{\n"; + + DECLARE( INT(), cpc ); + DECLARE( INT(), ck ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), ps ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + + if ( _again.isReferenced ) { + out << + " goto " << _resume << ";\n" + "\n"; + + out << EMIT_LABEL( _again ); + + out << + " switch ( " << vCS() << " ) {\n"; + AGAIN_CASES() << + " }\n" + "\n"; + + } + + out << EMIT_LABEL( _resume ); + + out << "switch ( " << vCS() << " ) {\n"; + + STATE_GOTO_CASES(); + + out << "}\n"; + + STATE_GOTOS(); + + EXIT_STATES(); + + out << EMIT_LABEL( _pop ); + + if ( redFsm->anyNfaStates() ) { + out << + "if ( nfa_len == 0 )\n" + " goto " << _out << ";\n" + "\n"; + + out << + "nfa_count += 1;\n" + "nfa_len -= 1;\n" << + P() << " = nfa_bp[nfa_len].p;\n"; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + "if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + "else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + out << "goto " << _resume << ";\n"; + } + + out << EMIT_LABEL( _out ); + + out << + "}\n"; +} diff --git a/libfsm/ipgoto.h b/libfsm/ipgoto.h new file mode 100644 index 00000000..1ec51bbf --- /dev/null +++ b/libfsm/ipgoto.h @@ -0,0 +1,129 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IPGOTO_H +#define IPGOTO_H + +#include <iostream> +#include "goto.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * class FGotoCodeGen + */ +class IpGoto + : public Goto +{ +public: + IpGoto( const CodeGenArgs &args ) + : + Goto( args, Ip ), + stLabel(0), + ctrLabel(0), + outLabel(0), + popLabel(0) + {} + + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTransAp *trans ); + std::ostream &COND_GOTO( RedCondPair *trans ); + std::ostream &FINISH_CASES(); + std::ostream &AGAIN_CASES(); + std::ostream &STATE_GOTOS(); + std::ostream &STATE_GOTO_CASES(); + + /* unused. */ + virtual std::ostream &ACTION_SWITCH() { return out; } + virtual std::ostream &EXEC_FUNCS() { return out; } + virtual std::ostream &TO_STATE_ACTION_SWITCH() { return out; } + virtual std::ostream &FROM_STATE_ACTION_SWITCH() { return out; } + virtual std::ostream &EOF_ACTION_SWITCH() { return out; } + + /* Unused */ + virtual void FROM_STATE_ACTIONS() {} + virtual void TO_STATE_ACTIONS() {} + virtual void REG_ACTIONS() {} + virtual void EOF_ACTIONS() {} + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + virtual void genAnalysis(); + virtual void writeData(); + virtual void writeExec(); + +protected: + bool useAgainLabel(); + + /* Called from Goto::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedStateAp *state ); + void GOTO_HEADER( RedStateAp *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded( RedCondPair *pair ); + void setLabelsNeeded( GenInlineList *inlineList ); + void setLabelsNeeded(); + + void NFA_PUSH_ACTION( RedNfaTarg *targ ); + void NFA_POP_TEST( RedNfaTarg *targ ); + virtual void NFA_FROM_STATE_ACTION_EXEC(); + + void NFA_PUSH_ST( RedStateAp *state ); + + void tableDataPass(); + + IpLabel *stLabel; + IpLabel *ctrLabel; + IpLabel *outLabel; + IpLabel *popLabel; +}; + +namespace C +{ + class IpGoto + : + public ::IpGoto + { + public: + IpGoto( const CodeGenArgs &args ) + : ::IpGoto( args ) + {} + }; +} + +#endif diff --git a/libfsm/libragel.h b/libfsm/libragel.h new file mode 100644 index 00000000..ad328e86 --- /dev/null +++ b/libfsm/libragel.h @@ -0,0 +1,32 @@ +/* + * Copyright 2016-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LIBRAGEL_H +#define _LIBRAGEL_H + +#ifdef __cplusplus +#define EXTERN_C extern "C" +#else +#define EXTERN_C +#endif + +#endif diff --git a/libfsm/load.h b/libfsm/load.h new file mode 100644 index 00000000..6ef7d57c --- /dev/null +++ b/libfsm/load.h @@ -0,0 +1,37 @@ +/* + * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LOAD_H +#define _LOAD_H + +#include "ragel.h" + +struct LoadRagel; +struct InputData; +struct HostLang; + +LoadRagel *newLoadRagel( InputData &id, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ); +void loadRagel( LoadRagel *lr, const char *inputFileName ); +void deleteLoadRagel( LoadRagel * ); + +#endif diff --git a/libfsm/parsedata.h b/libfsm/parsedata.h new file mode 100644 index 00000000..d45de5a6 --- /dev/null +++ b/libfsm/parsedata.h @@ -0,0 +1,429 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PARSEDATA_H +#define _PARSEDATA_H + +#include <iostream> +#include <limits.h> +#include <sstream> +#include <vector> +#include <set> + +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "fsmgraph.h" +#include "compare.h" +#include "vector.h" +#include "common.h" +#include "parsetree.h" +#include "action.h" + + +/* Forwards. */ +using std::ostream; + +struct VarDef; +struct Join; +struct Expression; +struct Term; +struct FactorWithAug; +struct FactorWithLabel; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct LongestMatch; +struct CodeGenData; +struct InputData; +struct InputItem; + +typedef DList<LongestMatch> LmList; + +/* This is used for tracking the include files/machine pairs. */ +struct IncludeHistoryItem +{ + IncludeHistoryItem( const char *fileName, const char *sectionName ) + : fileName(fileName), sectionName(sectionName) {} + + std::string fileName; + std::string sectionName; +}; + +typedef std::vector<IncludeHistoryItem> IncludeHistory; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl<GraphDictEl>, + public DListEl<GraphDictEl> +{ + GraphDictEl( std::string k ) + : key(k), value(0), isInstance(false) { } + GraphDictEl( std::string k, VarDef *value ) + : key(k), value(value), isInstance(false) { } + + ~GraphDictEl() + { + delete value; + } + + std::string getKey() { return key; } + + std::string key; + VarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<GraphDictEl, std::string, CmpString> GraphDict; +typedef DList<GraphDictEl> GraphList; + +/* Priority name dictionary. */ +typedef AvlMapEl<std::string, int> PriorDictEl; +typedef AvlMap<std::string, int, CmpString> PriorDict; + +/* Local error name dictionary. */ +typedef AvlMapEl<std::string, int> LocalErrDictEl; +typedef AvlMap<std::string, int, CmpString> LocalErrDict; + +struct NameMapVal +{ + Vector<NameInst*> vals; +}; + +/* Tree of instantiated names. */ +typedef AvlMapEl<std::string, NameMapVal*> NameMapEl; +typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap; +typedef Vector<NameInst*> NameVect; +typedef BstSet<NameInst*> NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + ~NameInst(); + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + std::string name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList<NameInst> NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +struct LengthDef +{ + LengthDef( char *name ) + : name(name) {} + + char *name; + LengthDef *prev, *next; +}; + +typedef DList<LengthDef> LengthDefList; + +extern const int ORD_PUSH; +extern const int ORD_RESTORE; +extern const int ORD_COND; +extern const int ORD_COND2; +extern const int ORD_TEST; + +/* Class to collect information about the machine during the + * parse of input. */ +struct ParseData +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + ParseData( InputData *id, std::string sectionName, + int machineId, const InputLoc §ionLoc, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ); + ~ParseData(); + + /* + * Setting up the graph dict. + */ + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin( const char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *addNameInst( const InputLoc &loc, std::string data, bool isLabel ); + void makeRootNames(); + void makeNameTree( GraphDictEl *gdNode ); + void makeExportsNameTree(); + void fillNameIndex( NameInst *from ); + + /* Increments the usage count on entry names. Names that are no longer + * needed will have their entry points unset. */ + void unsetObsoleteEntries( FsmAp *graph ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, const std::string &data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + NameRef *nameRef, int namePos ); + NameInst *resolveStateRef( NameRef *nameRef, InputLoc &loc, Action *action ); + void resolveNameRefs( InlineList *inlineList, Action *action ); + void resolveActionNameRefs(); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( const InputLoc &loc, const HostLang *hostLang, + const char *s1 ); + bool setAlphType( const InputLoc &loc, const HostLang *hostLang, + const char *s1, const char *s2 ); + + /* Override one of the variables ragel uses. */ + bool setVariable( const char *var, InlineList *inlineList ); + + /* Dumping the name instantiation tree. */ + void printNameInst( std::ostream &out, NameInst *nameInst, int level ); + void printNameTree( std::ostream &out ); + + void analysisResult( long code, long id, const char *scode ); + + void reportBreadthResults( BreadthResult *breadth ); + BreadthResult *checkBreadth( FsmAp *fsm ); + void reportAnalysisResult( FsmRes &res ); + + /* Make the graph from a graph dict node. Does minimization. */ + FsmRes makeInstance( GraphDictEl *gdNode ); + FsmRes makeSpecific( GraphDictEl *gdNode ); + FsmRes makeAll(); + + void makeExports(); + + FsmRes prepareMachineGen( GraphDictEl *graphDictEl, const HostLang *hostLang ); + void generateXML( ostream &out ); + void generateReduced( const char *inputFileName, CodeStyle codeStyle, + std::ostream &out, const HostLang *hostLang ); + + std::string sectionName; + FsmAp *sectionGraph; + + void initKeyOps( const HostLang *hostLang ); + + void errorStateLabels( const NameSet &resolved ); + + /* + * Data collected during the parse. + */ + + /* Dictionary of graphs. Both instances and non-instances go here. */ + GraphDict graphDict; + + /* The list of instances. */ + GraphList instanceList; + + /* Dictionary of actions. Lets actions be defined and then referenced. */ + ActionDict actionDict; + + /* Dictionary of named priorities. */ + PriorDict priorDict; + + /* Dictionary of named local errors. */ + LocalErrDict localErrDict; + + /* Various next identifiers. */ + int nextLocalErrKey, nextNameId; + + /* The default priority number key for a machine. This is active during + * the parse of the rhs of a machine assignment. */ + int curDefPriorKey; + + int curDefLocalErrKey; + + /* Alphabet type. */ + HostType *alphType; + HostType *userAlphType; + bool alphTypeSet; + InputLoc alphTypeLoc; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + InputData *id; + + /* The name of the file the fsm is from, and the spec name. */ + int machineId; + InputLoc sectionLoc; + + /* Root of the name tree. One root is for the instantiated machines. The + * other root is for exported definitions. */ + NameInst *rootName; + NameInst *exportsRootName; + + /* Name tree walking. */ + NameInst *curNameInst; + int curNameChild; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + void longestMatchInitTweaks( FsmAp *graph ); + void initNameWalk(); + void initExportsNameWalk(); + NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } + NameFrame enterNameScope( bool isLocal, int numScopes ); + void popNameScope( const NameFrame &frame ); + void resetNameScope( const NameFrame &frame ); + + void nfaTermCheckKleeneZero(); + void nfaTermCheckMinZero(); + void nfaTermCheckPlusZero(); + void nfaTermCheckRepZero(); + void nfaTermCheckZeroReps(); + + void clear(); + + /* Counter for assigning ids to longest match items. */ + int nextLongestMatchId; + + int nextRepId; + + /* List of all longest match parse tree items. */ + LmList lmList; + + Action *newLmCommonAction( const char *name, InlineList *inlineList ); + + Action *initTokStart; + int initTokStartOrd; + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + LengthDefList lengthDefList; + + CodeGenData *cgd; + + struct Cut + { + Cut( std::string name, int entryId ) + : name(name), entryId(entryId) {} + + std::string name; + int entryId; + }; + + /* Track the cuts we set in the fsm graph. We perform cost analysis on the + * built fsm graph for each of these entry points. */ + Vector<Cut> cuts; + + ParseData *prev, *next; + + FsmCtx *fsmCtx; + + /* Make a list of places to look for an included file. */ + bool duplicateInclude( const char *inclFileName, const char *inclSectionName ); + + IncludeHistory includeHistory; + + std::set<std::string> actionParams; +}; + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyChar( char c, ParseData *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ); +void makeFsmUniqueKeyArray( KeySet &result, const char *data, int len, + bool caseInsensitive, ParseData *pd ); +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ); +FsmAp *dotFsm( ParseData *pd ); +FsmAp *dotStarFsm( ParseData *pd ); + +Key *prepareHexString( ParseData *pd, const InputLoc &loc, + const char *data, long length, long &resLen ); +char *prepareLitString( InputData *id, const InputLoc &loc, const char *data, long length, + long &resLen, bool &caseInsensitive ); +const char *checkLitOptions( InputData *id, const InputLoc &loc, + const char *data, int length, bool &caseInsensitive ); + +#endif diff --git a/libfsm/parsetree.h b/libfsm/parsetree.h new file mode 100644 index 00000000..1d4f7e6b --- /dev/null +++ b/libfsm/parsetree.h @@ -0,0 +1,873 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PARSETREE_H +#define _PARSETREE_H + +#include "ragel.h" +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "fsmgraph.h" + +struct NameInst; + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + + +struct ParseData; + +/* Leaf type. */ +struct Literal; + +/* Tree nodes. */ + +struct Term; +struct FactorWithAug; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Expression; +struct Join; +struct NfaUnion; +struct MachineDef; +struct LongestMatch; +struct LongestMatchPart; +struct LmPartList; +struct Range; +struct LengthDef; +struct colm_data; +struct colm_location; + +/* Type of augmentation. Describes locations in the machine. */ +enum AugType +{ + /* Transition actions/priorities. */ + at_start, + at_all, + at_finish, + at_leave, + + /* Global error actions. */ + at_start_gbl_error, + at_all_gbl_error, + at_final_gbl_error, + at_not_start_gbl_error, + at_not_final_gbl_error, + at_middle_gbl_error, + + /* Local error actions. */ + at_start_local_error, + at_all_local_error, + at_final_local_error, + at_not_start_local_error, + at_not_final_local_error, + at_middle_local_error, + + /* To State Action embedding. */ + at_start_to_state, + at_all_to_state, + at_final_to_state, + at_not_start_to_state, + at_not_final_to_state, + at_middle_to_state, + + /* From State Action embedding. */ + at_start_from_state, + at_all_from_state, + at_final_from_state, + at_not_start_from_state, + at_not_final_from_state, + at_middle_from_state, + + /* EOF Action embedding. */ + at_start_eof, + at_all_eof, + at_final_eof, + at_not_start_eof, + at_not_final_eof, + at_middle_eof +}; + +/* IMPORTANT: These must follow the same order as the state augs in AugType + * since we will be using this to compose AugType. */ +enum StateAugType +{ + sat_start = 0, + sat_all, + sat_final, + sat_not_start, + sat_not_final, + sat_middle +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +struct NameRef : public Vector<std::string> {}; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef *target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef *target; +}; + +struct Label +{ + Label( const InputLoc &loc, std::string data ) + : loc(loc), data(data), cut(false) { } + + InputLoc loc; + std::string data; + bool cut; +}; + +/* Structrue represents an action assigned to some FactorWithAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct ConditionTest +{ + ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) : + loc(loc), type(type), action(action), sense(sense) { } + + InputLoc loc; + AugType type; + Action *action; + bool sense; +}; + +struct Token +{ + char *data; + int length; + ParserLoc loc; + + void set( const char *str, int len, colm_location *cl); + void set( colm_data *cd, colm_location *cl); + void set( const char *str, int len, const InputLoc &loc ); + void set( const char *str, int len, const ParserLoc &loc ); + +private: + void _set( const char *str, int len ); +}; + + +struct RedToken +{ + const char *data; + int length; + ParserLoc loc; + + void set( colm_data *cd, colm_location *cl); +}; + + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct VarDef +{ + VarDef( std::string name, MachineDef *machineDef ) + : name(name), machineDef(machineDef), isExport(false) { } + + ~VarDef(); + + /* Parse tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( const InputLoc &loc, ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + std::string name; + MachineDef *machineDef; + bool isExport; +}; + + +/* + * LongestMatch + * + * Wherever possible the item match will execute on the character. If not + * possible the item match will execute on a lookahead character and either + * hold the current char (if one away) or backup. + * + * How to handle the problem of backing up over a buffer break? + * + * Don't want to use pending out transitions for embedding item match because + * the role of item match action is different: it may sometimes match on the + * final transition, or may match on a lookahead character. + * + * Don't want to invent a new operator just for this. So just trail action + * after machine, this means we can only use literal actions. + * + * The item action may + * + * What states of the machine will be final. The item actions that wrap around + * on the last character will go straight to the start state. + * + * Some transitions will be lookahead transitions, they will hold the current + * character. Crossing them with regular transitions must be restricted + * because it does not make sense. The transition cannot simultaneously hold + * and consume the current character. + */ +struct LongestMatchPart +{ + LongestMatchPart( Join *join, Action *action, + const InputLoc &semiLoc, int longestMatchId ) + : + join(join), action(action), semiLoc(semiLoc), + longestMatchId(longestMatchId), inLmSelect(false) { } + + InputLoc getLoc(); + + Join *join; + Action *action; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + Action *actNfaOnLast; + Action *actNfaOnNext; + Action *actNfaOnEof; + int longestMatchId; + bool inLmSelect; + LongestMatch *longestMatch; + + LongestMatchPart *prev, *next; +}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct LmPartList : DList<LongestMatchPart> {}; + +struct LongestMatch +{ + /* Construct with a list of joins */ + LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) + : + loc(loc), + longestMatchList(longestMatchList), + lmSwitchHandlesError(false), + nfaConstruction(false) + { } + + InputLoc loc; + LmPartList *longestMatchList; + std::string name; + Action *lmActSelect; + bool lmSwitchHandlesError; + bool nfaConstruction; + + LongestMatch *next, *prev; + + /* Tree traversal. */ + FsmRes walkClassic( ParseData *pd ); + FsmRes walk( ParseData *pd ); + + FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm ); + bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ); + bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ); + void eliminateNfaActions( ParseData *pd, FsmAp *fsm ); + void advanceNfaActions( ParseData *pd, FsmAp *fsm ); + FsmRes buildBaseNfa( ParseData *pd ); + FsmRes walkNfa( ParseData *pd ); + + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + void transferScannerLeavingActions( FsmAp *graph ); + void runLongestMatch( ParseData *pd, FsmAp *graph ); + Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name, + InlineList *inlineList ); + void makeActions( ParseData *pd ); + void findName( ParseData *pd ); + void restart( FsmAp *graph, TransAp *trans ); + void restart( FsmAp *graph, CondAp *cond ); +}; + + +/* List of Expressions. */ +typedef DList<Expression> ExprList; + +struct MachineDef +{ + enum Type { + JoinType, + LongestMatchType, + LengthDefType, + NfaUnionType + }; + + MachineDef( Join *join ) + : join(join), longestMatch(0), lengthDef(0), nfaUnion(0), + type(JoinType) {} + + MachineDef( LongestMatch *longestMatch ) + : join(0), longestMatch(longestMatch), lengthDef(0), nfaUnion(0), + type(LongestMatchType) {} + + MachineDef( LengthDef *lengthDef ) + : join(0), longestMatch(0), lengthDef(lengthDef), nfaUnion(0), + type(LengthDefType) {} + + MachineDef( NfaUnion *nfaUnion ) + : join(0), longestMatch(0), lengthDef(0), nfaUnion(nfaUnion), + type(NfaUnionType) {} + + ~MachineDef(); + + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Join *join; + LongestMatch *longestMatch; + LengthDef *lengthDef; + NfaUnion *nfaUnion; + Type type; +}; + +/* + * Join + */ +struct Join +{ + /* Construct with the first expression. */ + Join( Expression *expr ); + Join( const InputLoc &loc, Expression *expr ); + + ~Join() + { + exprList.empty(); + } + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + FsmRes walkJoin( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Data. */ + InputLoc loc; + ExprList exprList; +}; + +/* + * Expression + */ +struct Expression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + /* Construct with an expression on the left and a term on the right. */ + Expression( Expression *expression, Term *term, Type type ) : + expression(expression), term(term), + type(type), prev(this), next(this) { } + + /* Construct with only a term. */ + Expression( Term *term ) : + expression(0), term(term), + type(TermType) , prev(this), next(this) { } + + /* Construct with a builtin type. */ + Expression( BuiltinMachine builtin ) : + expression(0), term(0), builtin(builtin), + type(BuiltinType), prev(this), next(this) { } + + ~Expression(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + Expression *expression; + Term *term; + BuiltinMachine builtin; + Type type; + + Expression *prev, *next; +}; + +typedef Vector<Term*> TermVect; + +/* + * NfaUnion + */ +struct NfaUnion +{ + /* Construct with only a term. */ + NfaUnion() : roundsList(0) { } + ~NfaUnion(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + TermVect terms; + NfaRoundVect *roundsList; +}; + + +/* + * Term + */ +struct Term +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorWithAugType + }; + + Term( Term *term, FactorWithAug *factorWithAug ) : + term(term), factorWithAug(factorWithAug), type(ConcatType) { } + + Term( Term *term, FactorWithAug *factorWithAug, Type type ) : + term(term), factorWithAug(factorWithAug), type(type) { } + + Term( Action *action1, Action *action2, Action *action3, + Term *term, FactorWithAug *factorWithAug, + FactorWithAug *factorWithAug2, Type type ) + : + action1(action1), action2(action2), action3(action3), + term(term), factorWithAug(factorWithAug), + factorWithAug2(factorWithAug2), type(type) + { } + + Term( FactorWithAug *factorWithAug ) : + term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } + + ~Term(); + + FsmRes walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Action *action1; + Action *action2; + Action *action3; + + Term *term; + FactorWithAug *factorWithAug; + FactorWithAug *factorWithAug2; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct FactorWithAug +{ + FactorWithAug( FactorWithRep *factorWithRep ) + : + priorDescs(0), + factorWithRep(factorWithRep) + {} + + ~FactorWithAug(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); + void assignPriorities( FsmAp *graph, int *priorOrd ); + + void assignConditions( FsmAp *graph ); + + /* Actions and priorities assigned to the factor node. */ + Vector<ParserAction> actions; + Vector<PriorityAug> priorityAugs; + PriorDesc *priorDescs; + std::vector<Label> labels; + Vector<EpsilonLink> epsilonLinks; + Vector<ConditionTest> conditions; + + FactorWithRep *factorWithRep; +}; + +/* Fourth level of precedence. Trailing unary operators. Provide kleen star, + * optional and plus. */ +struct FactorWithRep +{ + enum Type { + StarType, + StarStarType, + OptionalType, + PlusType, + ExactType, + MaxType, + MinType, + RangeType, + FactorWithNegType + }; + + FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, + int lowerRep, int upperRep, Type type ) + : + loc(loc), repId(0), factorWithRep(factorWithRep), + factorWithNeg(0), lowerRep(lowerRep), + upperRep(upperRep), type(type) + {} + + FactorWithRep( FactorWithNeg *factorWithNeg ) + : factorWithNeg(factorWithNeg), type(FactorWithNegType) + {} + + ~FactorWithRep(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + long long repId; + FactorWithRep *factorWithRep; + FactorWithNeg *factorWithNeg; + int lowerRep, upperRep; + Type type; + + /* Priority descriptor for StarStar type. */ + PriorDesc priorDescs[4]; +}; + +/* Fifth level of precedence. Provides Negation. */ +struct FactorWithNeg +{ + enum Type { + NegateType, + CharNegateType, + FactorType + }; + + FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : + loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } + + FactorWithNeg( Factor *factor ) : + factorWithNeg(0), factor(factor), type(FactorType) { } + + ~FactorWithNeg(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + FactorWithNeg *factorWithNeg; + Factor *factor; + Type type; +}; + +/* + * Factor + */ +struct Factor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + RangeType, + OrExprType, + RegExprType, + ReferenceType, + ParenType, + LongestMatchType, + NfaRep, + NfaWrap, + CondStar, + CondPlus + }; + + enum NfaRepeatMode { + NfaLegacy = 1, + NfaGreedy, + NfaLazy + }; + + /* Construct with a literal fsm. */ + Factor( Literal *literal ) : + literal(literal), type(LiteralType) { } + + /* Construct with a range. */ + Factor( Range *range ) : + range(range), type(RangeType) { } + + /* Construct with the or part of a regular expression. */ + Factor( ReItem *reItem ) : + reItem(reItem), type(OrExprType) { } + + /* Construct with a regular expression. */ + Factor( RegExpr *regExpr ) : + regExpr(regExpr), type(RegExprType) { } + + /* Construct with a reference to a var def. */ + Factor( const InputLoc &loc, VarDef *varDef ) : + loc(loc), varDef(varDef), type(ReferenceType) {} + + /* Construct with a parenthesized join. */ + Factor( Join *join ) : + join(join), type(ParenType) {} + + /* Construct with a longest match operator. */ + Factor( LongestMatch *longestMatch ) : + longestMatch(longestMatch), type(LongestMatchType) {} + + Factor( const InputLoc &loc, long long repId, Expression *expression, + Action *action1, Action *action2, Action *action3, + Action *action4, Action *action5, Action *action6, Type type ) + : + loc(loc), repId(repId), expression(expression), + action1(action1), action2(action2), action3(action3), + action4(action4), action5(action5), action6(action6), + type(type) + {} + + /* Cleanup. */ + ~Factor(); + + /* Tree traversal. */ + FsmRes walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + Literal *literal; + Range *range; + ReItem *reItem; + RegExpr *regExpr; + VarDef *varDef; + Join *join; + LongestMatch *longestMatch; + int lower, upper; + long repId; + Expression *expression; + Action *action1; + Action *action2; + Action *action3; + Action *action4; + Action *action5; + Action *action6; + PriorDesc priorDescs[4]; + NfaRepeatMode mode; + + Type type; +}; + +/* A range machine. Only ever composed of two literals. */ +struct Range +{ + Range( Literal *lowerLit, Literal *upperLit, bool caseIndep ) + : lowerLit(lowerLit), upperLit(upperLit), caseIndep(caseIndep) { } + + ~Range(); + FsmAp *walk( ParseData *pd ); + + Literal *lowerLit; + Literal *upperLit; + bool caseIndep; +}; + +/* Some literal machine. Can be a number or literal string. */ +struct Literal +{ + enum LiteralType { Number, LitString, HexString }; + + Literal( const InputLoc &loc, bool neg, const char *_data, int len, LiteralType type ) + : loc(loc), neg(neg), type(type) + { + data.append( _data, len ); + } + + FsmAp *walk( ParseData *pd ); + + InputLoc loc; + bool neg; + Vector<char> data; + LiteralType type; +}; + +/* Regular expression. */ +struct RegExpr +{ + enum RegExpType { RecurseItem, Empty }; + + /* Constructors. */ + RegExpr() : + type(Empty), caseInsensitive(false) { } + RegExpr(RegExpr *regExpr, ReItem *item) : + regExpr(regExpr), item(item), + type(RecurseItem), caseInsensitive(false) { } + + ~RegExpr(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + RegExpr *regExpr; + ReItem *item; + RegExpType type; + bool caseInsensitive; +}; + +/* An item in a regular expression. */ +struct ReItem +{ + enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; + + ReItem( const InputLoc &loc, const char *_data, int len ) + : + loc(loc), star(false), type(Data) + { + data.append( _data, len ); + } + + ReItem( const InputLoc &loc, ReItemType type ) + : loc(loc), star(false), type(type) { } + + ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) + : loc(loc), orBlock(orBlock), star(false), type(type) { } + + ~ReItem(); + FsmRes walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Vector<char> data; + ReOrBlock *orBlock; + bool star; + ReItemType type; +}; + +/* An or block item. */ +struct ReOrBlock +{ + enum ReOrBlockType { RecurseItem, Empty }; + + /* Constructors. */ + ReOrBlock() + : type(Empty) { } + ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) + : orBlock(orBlock), item(item), type(RecurseItem) { } + + ~ReOrBlock(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + ReOrBlock *orBlock; + ReOrItem *item; + ReOrBlockType type; +}; + +/* An item in an or block. */ +struct ReOrItem +{ + enum ReOrItemType { Data, Range }; + + ReOrItem( const InputLoc &loc, const char *_data, int len ) + : + loc(loc), type(Data) + { + data.append( _data, len ); + } + + ReOrItem( const InputLoc &loc, char lower, char upper ) + : loc(loc), lower(lower), upper(upper), type(Range) { } + + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Vector<char> data; + char lower; + char upper; + ReOrItemType type; +}; + + +#endif diff --git a/libfsm/pcheck.h b/libfsm/pcheck.h new file mode 100644 index 00000000..adc011b3 --- /dev/null +++ b/libfsm/pcheck.h @@ -0,0 +1,49 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PCHECK_H +#define _PCHECK_H + +class ParamCheck +{ +public: + ParamCheck( const char *paramSpec, int argc, const char **argv); + + bool check(); + + const char *paramArg; /* The argument to the parameter. */ + char parameter; /* The parameter matched. */ + enum { match, invalid, noparam } state; + + const char *argOffset; /* If we are reading params inside an + * arg this points to the offset. */ + + const char *curArg; /* Pointer to the current arg. */ + int iCurArg; /* Index to the current arg. */ + +private: + const char *paramSpec; /* Parameter spec supplied by the coder. */ + int argc; /* Arguement data from the command line. */ + const char **argv; +}; + +#endif diff --git a/libfsm/ragel-config.cmake.in b/libfsm/ragel-config.cmake.in new file mode 100644 index 00000000..8de5d2cb --- /dev/null +++ b/libfsm/ragel-config.cmake.in @@ -0,0 +1,3 @@ +# @_PACKAGE_NAME@-config.cmake Generated from ragel-config.cmake.in by cmake + +include("${CMAKE_CURRENT_LIST_DIR}/@_PACKAGE_NAME@-targets.cmake") diff --git a/libfsm/ragel.h b/libfsm/ragel.h new file mode 100644 index 00000000..c3fd6f22 --- /dev/null +++ b/libfsm/ragel.h @@ -0,0 +1,108 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _RAGEL_H +#define _RAGEL_H + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <string> +#include "vector.h" +#include "config.h" +#include "common.h" + +#define PROGNAME "ragel" + +#define MAIN_MACHINE "main" + +/* Target output style. */ +enum CodeStyle +{ + GenBinaryLoop, + GenBinaryExp, + GenFlatLoop, + GenFlatExp, + GenGotoLoop, + GenGotoExp, + GenSwitchLoop, + GenSwitchExp, + GenIpGoto +}; + +/* To what degree are machine minimized. */ +enum MinimizeLevel { + #ifdef TO_UPGRADE_CONDS + MinimizeApprox, + #endif + #ifdef TO_UPGRADE_CONDS + MinimizeStable, + #endif + MinimizePartition1, + MinimizePartition2 +}; + +enum MinimizeOpt { + MinimizeNone, + MinimizeEnd, + MinimizeMostOps, + MinimizeEveryOp +}; + +/* Target implementation */ +enum RubyImplEnum +{ + MRI, + Rubinius +}; + +/* Error reporting format. */ +enum ErrorFormat { + ErrorFormatGNU, + ErrorFormatMSVC, +}; + +extern ErrorFormat errorFormat; + + +struct colm_location; + +InputLoc makeInputLoc( const char *fileName, int line = 0, int col = 0 ); +InputLoc makeInputLoc( const struct colm_location *loc ); +std::ostream &operator<<( std::ostream &out, const InputLoc &loc ); + +void xmlEscapeHost( std::ostream &out, const char *data, long len ); + + +using std::endl; + +extern const char mainMachine[]; + +struct AbortCompile +{ + AbortCompile( int code ) + : code(code) {} + + int code; +}; + +#endif diff --git a/libfsm/redfsm.cc b/libfsm/redfsm.cc new file mode 100644 index 00000000..1b83e5b5 --- /dev/null +++ b/libfsm/redfsm.cc @@ -0,0 +1,1192 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "redfsm.h" +#include "avlmap.h" +#include "mergesort.h" +#include "fsmgraph.h" +#include <iostream> +#include <sstream> +#include <ctime> + +using std::ostringstream; + +GenInlineItem::~GenInlineItem() +{ + if ( children != 0 ) { + children->empty(); + delete children; + } +} + +string GenAction::nameOrLoc() +{ + if ( name.empty() ) { + ostringstream ret; + ret << loc.line << ":" << loc.col; + return ret.str(); + } + else { + return name; + } +} + +RedFsmAp::RedFsmAp( FsmCtx *fsmCtx, int machineId ) +: + keyOps(fsmCtx->keyOps), + fsmCtx(fsmCtx), + machineId(machineId), + forcedErrorState(false), + nextActionId(0), + nextTransId(0), + nextCondId(0), + startState(0), + errState(0), + errTrans(0), + errCond(0), + firstFinState(0), + numFinStates(0), + bAnyToStateActions(false), + bAnyFromStateActions(false), + bAnyRegActions(false), + bAnyEofActions(false), + bAnyEofTrans(false), + bAnyEofActivity(false), + bAnyActionGotos(false), + bAnyActionCalls(false), + bAnyActionNcalls(false), + bAnyActionRets(false), + bAnyActionNrets(false), + bAnyActionByValControl(false), + bAnyRegActionRets(false), + bAnyRegActionByValControl(false), + bAnyRegNextStmt(false), + bAnyRegCurStateRef(false), + bAnyRegBreak(false), + bAnyRegNbreak(false), + bUsingAct(false), + bAnyNfaStates(false), + bAnyNfaPushPops(false), + bAnyNfaPushes(false), + bAnyNfaPops(false), + bAnyTransCondRefs(false), + bAnyNfaCondRefs(false), + nextClass(0), + classMap(0) +{ +} + +RedFsmAp::~RedFsmAp() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + delete[] st->transList; + if ( st->nfaTargs != 0 ) + delete st->nfaTargs; + if ( st->inConds != 0 ) + delete[] st->inConds; + if ( st->inCondTests != 0 ) + delete[] st->inCondTests; + } + + delete[] allStates; + if ( classMap != 0 ) + delete[] classMap; + + for ( TransApSet::Iter ti = transSet; ti.lte(); ti++ ) { + if ( ti->condSpace != 0 ) + delete[] ti->v.outConds; + } + + condSet.empty(); + transSet.empty(); +} + +/* Does the machine have any actions. */ +bool RedFsmAp::anyActions() +{ + return actionMap.length() > 0; +} + +void RedFsmAp::depthFirstOrdering( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + depthFirstOrdering( cond->targ ); + } + } + + if ( state->nfaTargs ) { + for ( RedNfaTargs::Iter s = *state->nfaTargs; s.lte(); s++ ) + depthFirstOrdering( s->state ); + } +} + +/* Ordering states by transition connections. */ +void RedFsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( startState != 0 ) + depthFirstOrdering( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +void RedFsmAp::breadthFirstAdd( RedStateAp *state ) +{ + if ( state->onStateList ) + return; + + state->onStateList = true; + stateList.append( state ); +} + +void RedFsmAp::breadthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + if ( startState != 0 ) + breadthFirstAdd( startState ); + + int depth = 0; + int nextLevel = stateList.length(); + int pos = 0; + + /* To implement breadth-first we traverse the current list (assuming a + * start state) and add children. */ + RedStateAp *cur = stateList.head; + while ( cur != 0 ) { + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = cur->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + breadthFirstAdd( cond->targ ); + } + } + + if ( cur->nfaTargs ) { + for ( RedNfaTargs::Iter s = *cur->nfaTargs; s.lte(); s++ ) + breadthFirstAdd( s->state ); + } + + cur = cur->next; + pos += 1; + + if ( pos == nextLevel ) { + depth += 1; + nextLevel = stateList.length(); + } + } + + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + assert( stateListLen == stateList.length() ); +} + +#ifdef SCORE_ORDERING +void RedFsmAp::readScores() +{ + /* + * Reads processed transitions logged by ASM codegen when LOG_TRANS is + * enabled. Process with: + * + * cat trans-log | sort -n -k 1 -k 2 -k 3 | uniq -c | sort -r -n -k1 -r > scores + */ + FILE *sfn = fopen( "scores", "r" ); + + scores = new long*[nextStateId]; + for ( int i = 0; i < nextStateId; i++ ) { + scores[i] = new long[256]; + memset( scores[i], 0, sizeof(long) * 256 ); + } + + long score, m, state, ch; + while ( true ) { + int n = fscanf( sfn, "%ld %ld %ld %ld\n", &score, &m, &state, &ch ); + if ( n != 4 ) + break; + if ( m == machineId ) + scores[state][ch] = score; + } + fclose( sfn ); + + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + RedTransList::Iter rtel = st->outRange; + int chi = 0; + while ( rtel.lte() ) { + /* 1. Bring chi up to lower end of out range. */ + while ( chi < rtel->lowKey.getVal() ) { + chi++; + } + + /* 2. While inside lower, add in score. */ + while ( chi <= rtel->highKey.getVal() ) { + rtel->score += scores[st->id][chi]; + chi++; + } + + /* 3. Next range. */ + rtel++; + } + } +} + +/* This second pass will collect any states that didn't make it in the first + * pass. Used for depth-first and breadth-first passes. */ +void RedFsmAp::scoreSecondPass( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onListRest ) + return; + + /* Doing depth first, put state on the list. */ + state->onListRest = true; + + if ( !state->onStateList ) { + state->onStateList = true; + stateList.append( state ); + } + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + scoreSecondPass( cond->targ ); + } + } + + if ( state->nfaTargs ) { + for ( RedNfaTargs::Iter s = *state->nfaTargs; s.lte(); s++ ) + scoreSecondPass( s->state ); + } +} + +void RedFsmAp::scoreOrderingDepth( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->score > 10 ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + scoreOrderingDepth( cond->targ ); + } + } + } +} + +void RedFsmAp::scoreOrderingDepth() +{ + readScores(); + + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->onStateList = false; + st->onListRest = false; + } + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + scoreOrderingDepth( startState ); + + scoreSecondPass( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + scoreSecondPass( *en ); + if ( forcedErrorState ) + scoreSecondPass( errState ); + + assert( stateListLen == stateList.length() ); +} + +void RedFsmAp::scoreOrderingBreadth() +{ + readScores(); + + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->onStateList = false; + st->onListRest = false; + } + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + if ( startState != 0 ) + breadthFirstAdd( startState ); + + int depth = 0; + int nextLevel = stateList.length(); + int pos = 0; + + /* To implement breadth-first we traverse the current list (assuming a + * start state) and add children. */ + RedStateAp *cur = stateList.head; + while ( cur != 0 ) { + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = cur->outRange; rtel.lte(); rtel++ ) { + if ( rtel->score > 100 ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond( c ); + if ( cond->targ != 0 ) + breadthFirstAdd( cond->targ ); + } + } + } + + cur = cur->next; + pos += 1; + + if ( pos == nextLevel ) { + depth += 1; + nextLevel = stateList.length(); + } + } + + scoreSecondPass( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + scoreSecondPass( *en ); + if ( forcedErrorState ) + scoreSecondPass( errState ); + + assert( stateListLen == stateList.length() ); +} +#endif + +void RedFsmAp::randomizedOrdering() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + srand( time( 0 ) ); + + for ( int i = nextStateId; i > 0; i-- ) { + /* Pick one from 0 ... i (how many are left). */ + int nth = rand() % i; + + /* Go forward through the list adding the nth. Need to scan because + * there are items already added in the list. */ + for ( int j = 0; j < nextStateId; j++ ) { + if ( !allStates[j].onStateList ) { + if ( nth == 0 ) { + /* Add. */ + allStates[j].onStateList = true; + stateList.append( &allStates[j] ); + break; + } + else { + nth -= 1; + } + } + } + } + assert( stateListLen == stateList.length() ); +} + +/* Assign state ids by appearance in the state list. */ +void RedFsmAp::sequentialStateIds() +{ + /* Table based machines depend on the state numbers starting at zero. */ + nextStateId = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->id = nextStateId++; +} + +/* Stable sort the states by final state status. */ +void RedFsmAp::sortStatesByFinal() +{ + /* Move forward through the list and move final states onto the end. */ + RedStateAp *state = 0; + RedStateAp *next = stateList.head; + RedStateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinal ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +/* Assign state ids by final state state status. */ +void RedFsmAp::sortStateIdsByFinal() +{ + /* Table based machines depend on this starting at zero. */ + nextStateId = 0; + + /* First pass to assign non final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( ! st->isFinal ) + st->id = nextStateId++; + } + + /* Second pass to assign final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal ) + st->id = nextStateId++; + } +} + +struct CmpStateById +{ + static int compare( RedStateAp *st1, RedStateAp *st2 ) + { + if ( st1->id < st2->id ) + return -1; + else if ( st1->id > st2->id ) + return 1; + else + return 0; + } +}; + +void RedFsmAp::sortByStateId() +{ + /* Make the array. */ + int pos = 0; + RedStateAp **ptrList = new RedStateAp*[stateList.length()]; + for ( RedStateList::Iter st = stateList; st.lte(); st++, pos++ ) + ptrList[pos] = st; + + MergeSort<RedStateAp*, CmpStateById> mergeSort; + mergeSort.sort( ptrList, stateList.length() ); + + stateList.abandon(); + for ( int st = 0; st < pos; st++ ) + stateList.append( ptrList[st] ); + + delete[] ptrList; +} + +/* Find the final state with the lowest id. */ +void RedFsmAp::findFirstFinState() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) + firstFinState = st; + } +} + +void RedFsmAp::assignActionLocs() +{ + int nextLocation = 0; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + /* Store the loc, skip over the array and a null terminator. */ + act->location = nextLocation; + nextLocation += act->key.length() + 1; + } +} + +/* Check if we can extend the current range by displacing any ranges + * ahead to the singles. */ +bool RedFsmAp::canExtend( const RedTransList &list, int pos ) +{ + /* Get the transition that we want to extend. */ + RedTransAp *extendTrans = list[pos].value; + + /* Look ahead in the transition list. */ + for ( int next = pos + 1; next < list.length(); pos++, next++ ) { + /* If they are not continuous then cannot extend. */ + Key nextKey = list[next].lowKey; + keyOps->decrement( nextKey ); + if ( keyOps->ne( list[pos].highKey, nextKey ) ) + break; + + /* Check for the extenstion property. */ + if ( extendTrans == list[next].value ) + return true; + + /* If the span of the next element is more than one, then don't keep + * checking, it won't be moved to single. */ + unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); + if ( nextSpan > 1 ) + break; + } + return false; +} + +/* Move ranges to the singles list if it means we can extend some ranges, or if + * the spans are of length one. */ +void RedFsmAp::moveSelectTransToSingle( RedStateAp *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); ) { + /* Check if this is a range we can extend. */ + if ( canExtend( range, rpos ) ) { + /* Transfer singles over. */ + while ( range[rpos].value != range[rpos+1].value ) { + /* Transfer the range to single. */ + single.append( range[rpos+1] ); + range.remove( rpos+1 ); + } + + /* Extend. */ + range[rpos].highKey = range[rpos+1].highKey; + range.remove( rpos+1 ); + } + /* Maybe move it to the singles. */ + else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { + single.append( range[rpos] ); + range.remove( rpos ); + } + else { + /* Keeping it in the ranges. */ + rpos += 1; + } + } +} + +void RedFsmAp::moveAllTransToSingle( RedStateAp *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); rpos++ ) { + + RedTransEl el = range[rpos]; + unsigned long long span = keyOps->span( el.lowKey, el.highKey ); + + Key key = el.lowKey; + for ( unsigned long long pos = 0; pos < span; pos++ ) { + el.lowKey = el.highKey = key; + single.append( el ); + keyOps->increment( key ); + } + } + range.empty(); +} + +/* Look through ranges and choose suitable single character transitions. */ +void RedFsmAp::moveSelectTransToSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveSelectTransToSingle( st ); + } +} + +void RedFsmAp::moveAllTransToSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveAllTransToSingle( st ); + } +} + +void RedFsmAp::makeFlat() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + st->transList = new RedTransAp*[ span ]; + memset( st->transList, 0, sizeof(RedTransAp*)*span ); + + for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->lowKey, trans->lowKey )-1; + trSpan = keyOps->span( trans->lowKey, trans->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->transList[base+pos] = trans->value; + } + + /* Fill in the gaps with the default transition. */ + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } +} + +void RedFsmAp::characterClass( EquivList &equiv ) +{ + /* Find the global low and high keys. */ + bool anyTrans = false; + Key lowKey = keyOps->maxKey; + Key highKey = keyOps->minKey; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) + continue; + + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + + if ( keyOps->lt( st->lowKey, lowKey ) ) + lowKey = st->lowKey; + + if ( keyOps->gt( st->highKey, highKey ) ) + highKey = st->highKey; + + anyTrans = true; + } + + if ( ! anyTrans ) { + this->lowKey = lowKey; + this->highKey = highKey; + this->classMap = 0; + this->nextClass = 1; + return; + } + + long long next = 1; + equiv.append( new EquivClass( lowKey, highKey, next++ ) ); + + /* Start with a single equivalence class and break it up using range + * boundaries of each state. This will tell us what the equivalence class + * ranges are. These are the ranges that always go to the same state, + * across all states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) + continue; + + EquivList newList; + PairKeyMap uniqPairs; + + /* What is the set of unique transitions (*for this state) */ + EquivAlloc uniqTrans; + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( ! uniqTrans.find( rtel->value ) ) + uniqTrans.insert( rtel->value, next++ ); + } + + /* Merge with whole-machine equiv classes. */ + typedef RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> > RangePairIterPiListEquivClassPiVectorRedTransEl; + for ( RangePairIterPiListEquivClassPiVectorRedTransEl + pair( fsmCtx, equiv, st->outRange ); !pair.end(); pair++ ) + { + switch ( pair.userState ) { + + case RangePairIterPiListEquivClassPiVectorRedTransEl::RangeOverlap: { + /* Look up the char for s2. */ + EquivAllocEl *s2El = uniqTrans.find( pair.s2Tel.trans->value ); + + /* Can't use either equiv classes, find uniques. */ + PairKey pairKey( pair.s1Tel.trans->value, s2El->value ); + PairKeyMapEl *pairEl = uniqPairs.find( pairKey ); + if ( ! pairEl ) + pairEl = uniqPairs.insert( pairKey, next++ ); + + EquivClass *equivClass = new EquivClass( + pair.s1Tel.lowKey, pair.s1Tel.highKey, + pairEl->value ); + newList.append( equivClass ); + break; + } + + case RangePairIterPiListEquivClassPiVectorRedTransEl::RangeInS1: { + EquivClass *equivClass = new EquivClass( + pair.s1Tel.lowKey, pair.s1Tel.highKey, + pair.s1Tel.trans->value ); + newList.append( equivClass ); + break; + } + + case RangePairIterPiListEquivClassPiVectorRedTransEl::RangeInS2: { + /* Look up the char for s2. */ + EquivAllocEl *s2El = uniqTrans.find( pair.s2Tel.trans->value ); + + EquivClass *equivClass = new EquivClass( + pair.s2Tel.lowKey, pair.s2Tel.highKey, + s2El->value ); + newList.append( equivClass ); + break; + } + + case RangePairIterPiListEquivClassPiVectorRedTransEl::BreakS1: + case RangePairIterPiListEquivClassPiVectorRedTransEl::BreakS2: + break; + } + } + + equiv.empty(); + equiv.transfer( newList ); + } + + /* Reduce to sequential. */ + next = 0; + BstMap<long long, long long> map; + for ( EquivClass *c = equiv.head; c != 0; c = c->next ) { + BstMapEl<long long, long long> *el = map.find( c->value ); + if ( ! el ) + el = map.insert( c->value, next++ ); + c->value = el->value; + } + + /* Build the map and emit arrays from the range-based equiv classes. Will + * likely crash if there are no transitions in the FSM. */ + long long maxSpan = keyOps->span( lowKey, highKey ); + long long *dest = new long long[maxSpan]; + memset( dest, 0, sizeof(long long) * maxSpan ); + + for ( EquivClass *c = equiv.head; c != 0; c = c->next ) { + long long base = keyOps->span( lowKey, c->lowKey ) - 1; + long long span = keyOps->span( c->lowKey, c->highKey ); + for ( long long s = 0; s < span; s++ ) + dest[base + s] = c->value; + } + + this->lowKey = lowKey; + this->highKey = highKey; + this->classMap = dest; + this->nextClass = next; + +} + +void RedFsmAp::makeFlatClass() +{ + EquivList equiv; + characterClass( equiv ); + + /* Expand the transitions. This uses the equivalence classes. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->low = st->high = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + + /* Compute low and high in class space. Use a pair iter to find all + * the clases. Alleviates the need to iterate the whole input + * alphabet. */ + st->low = nextClass; + st->high = -1; + for ( RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> > + pair( fsmCtx, equiv, st->outRange ); !pair.end(); pair++ ) + { + if ( pair.userState == RangePairIter<PiList<EquivClass>, PiVector<RedTransEl> >::RangeOverlap || + pair.userState == RangePairIter<PiList<EquivClass>, PiVector<RedTransEl> >::RangeInS2 ) + { + long long off = keyOps->span( lowKey, pair.s2Tel.lowKey ) - 1; + if ( classMap[off] < st->low ) + st->low = classMap[off]; + if ( classMap[off] > st->high ) + st->high = classMap[off]; + } + } + + long long span = st->high - st->low + 1; + st->transList = new RedTransAp*[ span ]; + memset( st->transList, 0, sizeof(RedTransAp*)*span ); + + for ( RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> > + pair( fsmCtx, equiv, st->outRange ); !pair.end(); pair++ ) + { + if ( pair.userState == RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> >::RangeOverlap || + pair.userState == RangePairIter< PiList<EquivClass>, PiVector<RedTransEl> >::RangeInS2 ) + { + long long off = keyOps->span( lowKey, pair.s2Tel.lowKey ) - 1; + st->transList[ classMap[off] - st->low ] = pair.s2Tel.trans->value; + } + } + + /* Fill in the gaps with the default transition. */ + for ( long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } + + equiv.empty(); +} + + +/* A default transition has been picked, move it from the outRange to the + * default pointer. */ +void RedFsmAp::moveToDefault( RedTransAp *defTrans, RedStateAp *state ) +{ + /* Rewrite the outRange, omitting any ranges that use + * the picked default. */ + RedTransList outRange; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* If it does not take the default, copy it over. */ + if ( rtel->value != defTrans ) + outRange.append( *rtel ); + } + + /* Save off the range we just created into the state's range. */ + state->outRange.transfer( outRange ); + + /* Store the default. */ + state->defTrans = defTrans; +} + +bool RedFsmAp::alphabetCovered( RedTransList &outRange ) +{ + /* Cannot cover without any out ranges. */ + if ( outRange.length() == 0 ) + return false; + + /* If the first range doesn't start at the the lower bound then the + * alphabet is not covered. */ + RedTransList::Iter rtel = outRange; + if ( keyOps->lt( keyOps->minKey, rtel->lowKey ) ) + return false; + + /* Check that every range is next to the previous one. */ + rtel.increment(); + for ( ; rtel.lte(); rtel++ ) { + Key highKey = rtel[-1].highKey; + keyOps->increment( highKey ); + if ( keyOps->ne( highKey, rtel->lowKey ) ) + return false; + } + + /* The last must extend to the upper bound. */ + RedTransEl *last = &outRange[outRange.length()-1]; + if ( keyOps->lt( last->highKey, keyOps->maxKey ) ) + return false; + + return true; +} + +RedTransAp *RedFsmAp::chooseDefaultSpan( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many alphabet characters the + * transition spans. */ + unsigned long long *span = new unsigned long long[stateTransSet.length()]; + memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + int pos = inSet - stateTransSet.data; + span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); + } + + /* Find the max span, choose it for making the default. */ + RedTransAp *maxTrans = 0; + unsigned long long maxSpan = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( span[rtel.pos()] > maxSpan ) { + maxSpan = span[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] span; + return maxTrans; +} + +/* Pick default transitions from ranges for the states. */ +void RedFsmAp::chooseDefaultSpan() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Only pick a default transition if the alphabet is covered. This + * avoids any transitions in the out range that go to error and avoids + * the need for an ERR state. */ + if ( alphabetCovered( st->outRange ) ) { + /* Pick a default transition by largest span. */ + RedTransAp *defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } + } +} + +RedTransAp *RedFsmAp::chooseDefaultGoto( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + for ( int c = 0; c < rtel->value->numConds(); c++ ) { + RedCondPair *cond = rtel->value->outCond(c); + if ( cond->targ == state->next ) + return rtel->value; + } + } + return 0; +} + +void RedFsmAp::chooseDefaultGoto() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultGoto( st ); + if ( defTrans == 0 ) + defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTransAp *RedFsmAp::chooseDefaultNumRanges( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many ranges use the transition. */ + int *numRanges = new int[stateTransSet.length()]; + memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + numRanges[inSet - stateTransSet.data] += 1; + } + + /* Find the max number of ranges. */ + RedTransAp *maxTrans = 0; + int maxNumRanges = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( numRanges[rtel.pos()] > maxNumRanges ) { + maxNumRanges = numRanges[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] numRanges; + return maxTrans; +} + +void RedFsmAp::chooseDefaultNumRanges() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultNumRanges( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedCondAp *RedFsmAp::getErrorCond() +{ + return allocateCond( getErrorState(), 0 ); +} + +RedTransAp *RedFsmAp::getErrorTrans() +{ + return allocateTrans( getErrorState(), 0 ); +} + +RedStateAp *RedFsmAp::getErrorState() +{ + /* Something went wrong. An error state is needed but one was not supplied + * by the frontend. */ + assert( errState != 0 ); + return errState; +} + +/* Makes a plain transition. */ +RedTransAp *RedFsmAp::allocateTrans( RedStateAp *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTransAp redTrans( 0, 0, targ, action ); + RedTransAp *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTransAp( nextTransId++, nextCondId++, targ, action ); + transSet.insert( inDict ); + } + return inDict; +} + +/* Makes a cond list transition. */ +RedTransAp *RedFsmAp::allocateTrans( GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTransAp redTrans( 0, condSpace, outConds, numConds, errCond ); + RedTransAp *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTransAp( nextTransId++, condSpace, outConds, numConds, errCond ); + transSet.insert( inDict ); + } + else { + /* Need to free the out cond vector. */ + delete[] outConds; + } + return inDict; +} + +RedCondAp *RedFsmAp::allocateCond( RedStateAp *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedCondAp redCond( targ, action, 0 ); + RedCondAp *inDict = condSet.find( &redCond ); + if ( inDict == 0 ) { + inDict = new RedCondAp( targ, action, nextCondId++ ); + condSet.insert( inDict ); + } + return inDict; +} + +void RedFsmAp::partitionFsm( int nparts ) +{ + /* At this point the states are ordered by a depth-first traversal. We + * will allocate to partitions based on this ordering. */ + this->nParts = nparts; + int partSize = stateList.length() / nparts; + int remainder = stateList.length() % nparts; + int numInPart = partSize; + int partition = 0; + if ( remainder-- > 0 ) + numInPart += 1; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->partition = partition; + + numInPart -= 1; + if ( numInPart == 0 ) { + partition += 1; + numInPart = partSize; + if ( remainder-- > 0 ) + numInPart += 1; + } + } +} + +void RedFsmAp::setInTrans() +{ + /* First pass counts the number of transitions. */ + for ( CondApSet::Iter trans = condSet; trans.lte(); trans++ ) + trans->p.targ->numInConds += 1; + + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) + trans->p.targ->numInConds += 1; + else { + /* We have a placement choice here, but associate it with the + * first. */ + RedCondPair *pair = trans->outCond( 0 ); + pair->targ->numInCondTests += 1; + } + } + + /* Allocate. Reset the counts so we can use them as the current size. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->inConds = new RedCondPair*[st->numInConds]; + st->numInConds = 0; + + st->inCondTests = new RedTransAp*[st->numInCondTests]; + st->numInCondTests = 0; + } + + /* Fill the arrays. */ + for ( CondApSet::Iter trans = condSet; trans.lte(); trans++ ) { + RedStateAp *targ = trans->p.targ; + targ->inConds[targ->numInConds++] = &trans->p; + } + + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) { + if ( trans->condSpace == 0 ) { + RedStateAp *targ = trans->p.targ; + targ->inConds[targ->numInConds++] = &trans->p; + } + else { + RedCondPair *pair = trans->outCond( 0 ); + RedStateAp *targ = pair->targ; + targ->inCondTests[targ->numInCondTests++] = trans; + } + } +} diff --git a/libfsm/redfsm.h b/libfsm/redfsm.h new file mode 100644 index 00000000..392b1a9c --- /dev/null +++ b/libfsm/redfsm.h @@ -0,0 +1,889 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _REDFSM_H +#define _REDFSM_H + +#include <assert.h> +#include <string.h> +#include <string> +#include "config.h" +#include "common.h" +#include "vector.h" +#include "dlist.h" +#include "compare.h" +#include "bstmap.h" +#include "bstset.h" +#include "avlmap.h" +#include "avltree.h" +#include "avlbasic.h" +#include "mergesort.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" + +#define TRANS_ERR_TRANS 0 +#define STATE_ERR_STATE 0 +#define FUNC_NO_FUNC 0 + +// #define SCORE_ORDERING 1 + +using std::string; + +struct RedStateAp; +struct GenInlineList; +struct GenAction; +struct FsmCtx; +struct GenCondSpace; +typedef BstSet<int> RedCondKeySet; + +/* + * Inline code tree + */ +struct GenInlineItem +{ + enum Type + { + Text, Goto, Call, Ncall, Next, GotoExpr, CallExpr, + NcallExpr, NextExpr, Ret, Nret, + PChar, Char, Hold, Curs, Targs, Entry, Exec, Break, Nbreak, + LmSwitch, LmExec, LmSetActId, LmSetTokEnd, LmGetTokEnd, + LmInitAct, LmInitTokStart, LmSetTokStart, NfaClear, + HostStmt, HostExpr, HostText, + GenStmt, GenExpr, LmCase, LmHold, + NfaWrapAction, NfaWrapConds + }; + + GenInlineItem( const InputLoc &loc, Type type ) : + loc(loc), targId(0), targState(0), + lmId(0), children(0), offset(0), + wrappedAction(0), type(type) { } + + ~GenInlineItem(); + + InputLoc loc; + std::string data; + int targId; + RedStateAp *targState; + int lmId; + GenInlineList *children; + int offset; + GenAction *wrappedAction; + GenCondSpace *condSpace; + RedCondKeySet condKeySet; + Type type; + + GenInlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct GenInlineList : public DList<GenInlineItem> { }; + +struct GenInlineExpr +{ + GenInlineExpr( const InputLoc &loc, GenInlineList *inlineList ) + : loc(loc), inlineList( inlineList ) {} + + ~GenInlineExpr() + { + if ( inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + } + } + + InputLoc loc; + GenInlineList *inlineList; +}; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct GenAction +: + public DListEl<GenAction> +{ + GenAction( ) + : + inlineList(0), + actionId(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numNfaPushRefs(0), + numNfaRestoreRefs(0), + numNfaPopActionRefs(0), + numNfaPopTestRefs(0) + { + } + + ~GenAction() + { + if ( inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + } + } + + /* Data collected during parse. */ + InputLoc loc; + std::string name; + GenInlineList *inlineList; + int actionId; + + string nameOrLoc(); + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numNfaPushRefs; + int numNfaRestoreRefs; + int numNfaPopActionRefs; + int numNfaPopTestRefs; +}; + + +/* Forwards. */ +struct RedStateAp; +struct StateAp; + +/* Transistion GenAction Element. */ +typedef SBstMapEl< int, GenAction* > GenActionTableEl; + +/* Transition GenAction Table. */ +struct GenActionTable + : public SBstMap< int, GenAction*, CmpOrd<int> > +{ + void setAction( int ordering, GenAction *action ); + void setActions( int *orderings, GenAction **actions, int nActs ); + void setActions( const GenActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpGenActionTableEl +{ + static int compare( const GenActionTableEl &action1, + const GenActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for GenActionTable. */ +typedef CmpSTable< GenActionTableEl, CmpGenActionTableEl > CmpGenActionTable; + +/* Set of states. */ +typedef BstSet<RedStateAp*> RedStateSet; +typedef BstSet<int> IntSet; + +/* Reduced action. */ +struct RedAction +: + public AvlTreeEl<RedAction> +{ + RedAction( ) + : + key(), + eofRefs(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numNfaPushRefs(0), + numNfaRestoreRefs(0), + numNfaPopActionRefs(0), + numNfaPopTestRefs(0), + bAnyNextStmt(false), + bAnyCurStateRef(false), + bAnyBreakStmt(false), + bUsingAct(false) + { } + + const GenActionTable &getKey() + { return key; } + + GenActionTable key; + int actListId; + int location; + IntSet *eofRefs; + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numNfaPushRefs; + int numNfaRestoreRefs; + int numNfaPopActionRefs; + int numNfaPopTestRefs; + + bool anyNextStmt() { return bAnyNextStmt; } + bool anyCurStateRef() { return bAnyCurStateRef; } + bool anyBreakStmt() { return bAnyBreakStmt; } + bool usingAct() { return bUsingAct; } + + bool bAnyNextStmt; + bool bAnyCurStateRef; + bool bAnyBreakStmt; + bool bUsingAct; +}; + +typedef AvlTree<RedAction, GenActionTable, CmpGenActionTable> GenActionTableMap; + +struct RedCondPair +{ + int id; + RedStateAp *targ; + RedAction *action; +}; + +struct RedCondAp +: + public AvlTreeEl<RedCondAp> +{ + RedCondAp( RedStateAp *targ, RedAction *action, int id ) + { + p.id = id; + p.targ = targ; + p.action = action; + } + + RedCondPair p; +}; + +struct RedCondEl +{ + CondKey key; + RedCondAp *value; +}; + +struct CmpRedCondEl +{ + static int compare( const RedCondEl &el1, const RedCondEl &el2 ) + { + if ( el1.key < el2.key ) + return -1; + else if ( el1.key > el2.key ) + return 1; + else if ( el1.value < el2.value ) + return -1; + else if ( el1.value > el2.value ) + return 1; + else + return 0; + } +}; + +typedef Vector< GenAction* > GenCondSet; + +struct GenCondSpace +{ + GenCondSpace() + : + numTransRefs(0), + numNfaRefs(0) + {} + + Key baseKey; + GenCondSet condSet; + int condSpaceId; + + long fullSize() + { return ( 1 << condSet.length() ); } + + long numTransRefs; + long numNfaRefs; + + GenCondSpace *next, *prev; +}; + +typedef DList<GenCondSpace> CondSpaceList; + +struct RedCondVect +{ + int numConds; + RedCondEl *outConds; + RedCondAp *errCond; +}; + +/* Reduced transition. */ +struct RedTransAp +: + public AvlTreeEl<RedTransAp> +{ + RedTransAp( int id, GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ) + : + id(id), + condSpace(condSpace) + { + v.outConds = outConds; + v.numConds = numConds; + v.errCond = errCond; + } + + RedTransAp( int id, int condId, RedStateAp *targ, RedAction *action ) + : + id(id), + condSpace(0) + { + p.id = condId; + p.targ = targ; + p.action = action; + } + + long condFullSize() + { + return condSpace == 0 ? 1 : condSpace->fullSize(); + } + + CondKey outCondKey( int off ) + { + return condSpace == 0 ? CondKey(0) : v.outConds[off].key; + } + + RedCondPair *outCond( int off ) + { + return condSpace == 0 ? &p : &v.outConds[off].value->p; + } + + int numConds() + { + return condSpace == 0 ? 1 : v.numConds; + } + + RedCondPair *errCond() + { + return condSpace == 0 ? 0 : ( v.errCond != 0 ? &v.errCond->p : 0 ); + } + + int id; + GenCondSpace *condSpace; + + /* Either a pair or a vector of conds. */ + union + { + RedCondPair p; + RedCondVect v; + }; +}; + +/* Compare of transitions for the final reduction of transitions. Comparison + * is on target and the pointer to the shared action table. It is assumed that + * when this is used the action tables have been reduced. */ +struct CmpRedTransAp +{ + static int compare( const RedTransAp &t1, const RedTransAp &t2 ) + { + if ( t1.condSpace < t2.condSpace ) + return -1; + else if ( t1.condSpace > t2.condSpace ) + return 1; + else { + if ( t1.condSpace == 0 ) { + if ( t1.p.targ < t2.p.targ ) + return -1; + else if ( t1.p.targ > t2.p.targ ) + return 1; + else if ( t1.p.action < t2.p.action ) + return -1; + else if ( t1.p.action > t2.p.action ) + return 1; + else + return 0; + + } + else { + if ( t1.v.numConds < t2.v.numConds ) + return -1; + else if ( t1.v.numConds > t2.v.numConds ) + return 1; + else + { + RedCondEl *i1 = t1.v.outConds, *i2 = t2.v.outConds; + long len = t1.v.numConds, cmpResult; + for ( long pos = 0; pos < len; + pos += 1, i1 += 1, i2 += 1 ) + { + cmpResult = CmpRedCondEl::compare(*i1, *i2); + if ( cmpResult != 0 ) + return cmpResult; + } + return 0; + } + } + } + } +}; + +struct CmpRedCondAp +{ + static int compare( const RedCondAp &t1, const RedCondAp &t2 ) + { + if ( t1.p.targ < t2.p.targ ) + return -1; + else if ( t1.p.targ > t2.p.targ ) + return 1; + else if ( t1.p.action < t2.p.action ) + return -1; + else if ( t1.p.action > t2.p.action ) + return 1; + else + return 0; + } +}; + +typedef AvlBasic<RedTransAp, CmpRedTransAp> TransApSet; +typedef AvlBasic<RedCondAp, CmpRedCondAp> CondApSet; + +/* Element in out range. */ +struct RedTransEl +{ + /* Constructors. */ + RedTransEl( Key lowKey, Key highKey, RedTransAp *value ) + : + lowKey(lowKey), + highKey(highKey), + value(value) +#ifdef SCORE_ORDERING + , score(0) +#endif + { } + + Key lowKey, highKey; + RedTransAp *value; +#ifdef SCORE_ORDERING + long long score; +#endif +}; + +typedef Vector<RedTransEl> RedTransList; +typedef Vector<RedStateAp*> RedStateVect; + +typedef BstMapEl<RedStateAp*, unsigned long long> RedSpanMapEl; +typedef BstMap<RedStateAp*, unsigned long long> RedSpanMap; + +/* Compare used by span map sort. Reverse sorts by the span. */ +struct CmpRedSpanMapEl +{ + static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) + { + if ( smel1.value > smel2.value ) + return -1; + else if ( smel1.value < smel2.value ) + return 1; + else + return 0; + } +}; + +/* Sorting state-span map entries by span. */ +typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; + +/* Set of entry ids that go into this state. */ +typedef Vector<int> EntryIdVect; +typedef Vector<char*> EntryNameVect; + +struct Condition +{ + Condition( ) + : key(0), baseKey(0) {} + + Key key; + Key baseKey; + GenCondSet condSet; + + Condition *next, *prev; +}; +typedef DList<Condition> ConditionList; + +struct GenStateCond +{ + Key lowKey; + Key highKey; + + GenCondSpace *condSpace; + + GenStateCond *prev, *next; +}; +typedef DList<GenStateCond> GenStateCondList; +typedef Vector<GenStateCond*> StateCondVect; + +struct RedNfaTarg +{ + RedNfaTarg( RedStateAp *state, RedAction *push, + RedAction *popTest, int order ) + : + id(0), + state(state), + push(push), + popTest(popTest), + order(order) + {} + + long id; + RedStateAp *state; + RedAction *push; + RedAction *popTest; + int order; +}; + +struct RedNfaTargCmp +{ + static inline long compare( const RedNfaTarg &k1, const RedNfaTarg &k2 ) + { + if ( k1.order < k2.order ) + return -1; + else if ( k1.order > k2.order ) + return 1; + return 0; + } +}; + +typedef Vector<RedNfaTarg> RedNfaTargs; + +/* Reduced state. */ +struct RedStateAp +{ + RedStateAp() + : + defTrans(0), + transList(0), + isFinal(false), + labelNeeded(false), + outNeeded(false), + onStateList(false), + onListRest(false), + toStateAction(0), + fromStateAction(0), + eofAction(0), + eofTrans(0), + id(0), + bAnyRegCurStateRef(false), + partitionBoundary(false), + inConds(0), + numInConds(0), + inCondTests(0), + numInCondTests(0), + nfaTargs(0), + outCondSpace(0) + { } + + /* Transitions out. */ + RedTransList outSingle; + RedTransList outRange; + RedTransAp *defTrans; + + /* For flat keys. */ + Key lowKey, highKey; + RedTransAp **transList; + long long low, high; + + /* The list of states that transitions from this state go to. */ + RedStateVect targStates; + + bool isFinal; + bool labelNeeded; + bool outNeeded; + bool onStateList; + bool onListRest; + RedAction *toStateAction; + RedAction *fromStateAction; + RedAction *eofAction; + RedTransAp *eofTrans; + int id; + + /* Pointers for the list of states. */ + RedStateAp *prev, *next; + + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool bAnyRegCurStateRef; + + int partition; + bool partitionBoundary; + + RedCondPair **inConds; + int numInConds; + + RedTransAp **inCondTests; + int numInCondTests; + + RedNfaTargs *nfaTargs; + GenCondSpace *outCondSpace; + RedCondKeySet outCondKeys; +}; + +/* List of states. */ +typedef DList<RedStateAp> RedStateList; + +/* Set of reduced transitons. Comparison is by pointer. */ +typedef BstSet< RedTransAp*, CmpOrd<RedTransAp*> > RedTransSet; + +/* Next version of the fsm machine. */ +struct RedFsmAp +{ + RedFsmAp( FsmCtx *fsmCtx, int machineId ); + ~RedFsmAp(); + + KeyOps *keyOps; + FsmCtx *fsmCtx; + int machineId; + + bool forcedErrorState; + + int nextActionId; + int nextTransId; + int nextCondId; + + /* Next State Id doubles as the total number of state ids. */ + int nextStateId; + + TransApSet transSet; + CondApSet condSet; + GenActionTableMap actionMap; + RedStateList stateList; + RedStateSet entryPoints; + RedStateAp *startState; + RedStateAp *errState; + RedTransAp *errTrans; + RedCondAp *errCond; + RedTransAp *errActionTrans; + RedStateAp *firstFinState; + RedStateAp *allStates; + int numFinStates; + int nParts; + + bool bAnyToStateActions; + bool bAnyFromStateActions; + bool bAnyRegActions; + bool bAnyEofActions; + bool bAnyEofTrans; + bool bAnyEofActivity; + bool bAnyActionGotos; + bool bAnyActionCalls; + bool bAnyActionNcalls; + bool bAnyActionRets; + bool bAnyActionNrets; + bool bAnyActionByValControl; + bool bAnyRegActionRets; + bool bAnyRegActionByValControl; + bool bAnyRegNextStmt; + bool bAnyRegCurStateRef; + bool bAnyRegBreak; + bool bAnyRegNbreak; + bool bUsingAct; + bool bAnyNfaStates; + bool bAnyNfaPushPops; + bool bAnyNfaPushes; + bool bAnyNfaPops; + bool bAnyTransCondRefs; + bool bAnyNfaCondRefs; + + int maxState; + int maxSingleLen; + int maxRangeLen; + int maxKeyOffset; + int maxIndexOffset; + int maxIndex; + int maxActListId; + int maxActionLoc; + int maxActArrItem; + unsigned long long maxSpan; + int maxFlatIndexOffset; + Key maxKey; + int maxCondSpaceId; + int maxCond; + + bool anyActions(); + bool anyToStateActions() { return bAnyToStateActions; } + bool anyFromStateActions() { return bAnyFromStateActions; } + bool anyRegActions() { return bAnyRegActions; } + bool anyEofActions() { return bAnyEofActions; } + bool anyEofTrans() { return bAnyEofTrans; } + bool anyEofActivity() { return bAnyEofActivity; } + bool anyActionGotos() { return bAnyActionGotos; } + bool anyActionCalls() { return bAnyActionCalls; } + bool anyActionNcalls() { return bAnyActionNcalls; } + bool anyActionRets() { return bAnyActionRets; } + bool anyActionNrets() { return bAnyActionNrets; } + bool anyActionByValControl() { return bAnyActionByValControl; } + bool anyRegActionRets() { return bAnyRegActionRets; } + bool anyRegActionByValControl() { return bAnyRegActionByValControl; } + bool anyRegNextStmt() { return bAnyRegNextStmt; } + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool anyRegBreak() { return bAnyRegBreak; } + bool usingAct() { return bUsingAct; } + bool anyRegNbreak() { return bAnyRegNbreak; } + bool anyNfaStates() { return bAnyNfaStates; } + + /* Is is it possible to extend a range by bumping ranges that span only + * one character to the singles array. */ + bool canExtend( const RedTransList &list, int pos ); + + /* Pick single transitions from the ranges. */ + void moveSelectTransToSingle( RedStateAp *state ); + void moveAllTransToSingle( RedStateAp *state ); + + void moveSelectTransToSingle(); + void moveAllTransToSingle(); + + void makeFlat(); + + /* State low/high, in key space and class space. */ + Key lowKey; + Key highKey; + long long nextClass; + long long *classMap; + + /* Support structs for equivalence class computation. */ + struct EquivClass + { + EquivClass( Key lowKey, Key highKey, long long value ) + : lowKey(lowKey), highKey(highKey), value(value) {} + + Key lowKey, highKey; + long long value; + EquivClass *prev, *next; + }; + + typedef DList<EquivClass> EquivList; + typedef BstMap<RedTransAp*, int> EquivAlloc; + typedef BstMapEl<RedTransAp*, int> EquivAllocEl; + + struct PairKey + { + PairKey( long long k1, long long k2 ) + : k1(k1), k2(k2) {} + + long long k1; + long long k2; + }; + + struct PairKeyCmp + { + static inline long compare( const PairKey &k1, const PairKey &k2 ) + { + if ( k1.k1 < k2.k1 ) + return -1; + else if ( k1.k1 > k2.k1 ) + return 1; + if ( k1.k2 < k2.k2 ) + return -1; + else if ( k1.k2 > k2.k2 ) + return 1; + else + return 0; + } + }; + + typedef BstMap< PairKey, long long, PairKeyCmp > PairKeyMap; + typedef BstMapEl< PairKey, long long > PairKeyMapEl; + + void characterClass( EquivList &equiv ); + void makeFlatClass(); + + /* Move a selected transition from ranges to default. */ + void moveToDefault( RedTransAp *defTrans, RedStateAp *state ); + + /* Pick a default transition by largest span. */ + RedTransAp *chooseDefaultSpan( RedStateAp *state ); + void chooseDefaultSpan(); + + /* Pick a default transition by most number of ranges. */ + RedTransAp *chooseDefaultNumRanges( RedStateAp *state ); + void chooseDefaultNumRanges(); + + /* Pick a default transition tailored towards goto driven machine. */ + RedTransAp *chooseDefaultGoto( RedStateAp *state ); + void chooseDefaultGoto(); + + /* Ordering states by transition connections. */ + void optimizeStateOrdering( RedStateAp *state ); + void optimizeStateOrdering(); + + /* Ordering states by transition connections. */ + void depthFirstOrdering( RedStateAp *state ); + void depthFirstOrdering(); + + void breadthFirstAdd( RedStateAp *state ); + void breadthFirstOrdering(); + + void randomizedOrdering(); + +#ifdef SCORE_ORDERING + long **scores; + void scoreSecondPass( RedStateAp *state ); + void scoreOrderingBreadth(); + void readScores(); + void scoreOrderingDepth( RedStateAp *state ); + void scoreOrderingDepth(); +#endif + + /* Set state ids. */ + void sequentialStateIds(); + void sortStateIdsByFinal(); + + /* Arrange states in by final id. This is a stable sort. */ + void sortStatesByFinal(); + + /* Sorting states by id. */ + void sortByStateId(); + + /* Locating the first final state. This is the final state with the lowest + * id. */ + void findFirstFinState(); + + void assignActionLocs(); + + RedCondAp *getErrorCond(); + RedTransAp *getErrorTrans(); + RedStateAp *getErrorState(); + + /* Is every char in the alphabet covered? */ + bool alphabetCovered( RedTransList &outRange ); + + RedTransAp *allocateTrans( RedStateAp *targ, RedAction *action ); + RedTransAp *allocateTrans( GenCondSpace *condSpace, + RedCondEl *outConds, int numConds, RedCondAp *errCond ); + + RedCondAp *allocateCond( RedStateAp *targState, RedAction *actionTable ); + + void partitionFsm( int nParts ); + + void setInTrans(); +}; + +#endif diff --git a/libfsm/reducer.cc b/libfsm/reducer.cc new file mode 100644 index 00000000..592dcfe1 --- /dev/null +++ b/libfsm/reducer.cc @@ -0,0 +1,230 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "reducer.h" + +#include <colm/colm.h> +#include <colm/tree.h> + +#include <errno.h> + +using std::endl; +using std::ifstream; + +void TopLevel::loadMachineName( string data ) +{ + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) + pd->fsmCtx->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; +} + +void TopLevel::tryMachineDef( const InputLoc &loc, std::string name, + MachineDef *machineDef, bool isInstance ) +{ + GraphDictEl *newEl = pd->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, machineDef ); + newEl->isInstance = isInstance; + newEl->loc = loc; + newEl->value->isExport = exportContext[exportContext.length()-1]; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + pd->id->error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +long TopLevel::tryLongScan( const InputLoc &loc, const char *data ) +{ + /* Convert the priority number to a long. Check for overflow. */ + long priorityNum; + errno = 0; + + long aug = strtol( data, 0, 10 ); + if ( errno == ERANGE && aug == LONG_MAX ) { + /* Priority number too large. Recover by setting the priority to 0. */ + pd->id->error(loc) << "priority number " << data << + " overflows" << endl; + priorityNum = 0; + } + else if ( errno == ERANGE && aug == LONG_MIN ) { + /* Priority number too large in the neg. Recover by using 0. */ + pd->id->error(loc) << "priority number " << data << + " underflows" << endl; + priorityNum = 0; + } + else { + /* No overflow or underflow. */ + priorityNum = aug; + } + + return priorityNum; +} + +void TopLevel::include( const InputLoc &incLoc, bool fileSpecified, string fileName, string machine ) +{ + /* Stash the current section name and pd. */ + string sectionName = pd->sectionName; + ParseData *pd0 = pd; + + const char **includeChecks = 0; + long found = 0; + + const char *inclSectionName = machine.c_str(); + + /* Default the section name to the current section name. */ + if ( inclSectionName == 0 ) + inclSectionName = sectionName.c_str(); + + /* Build the include checks. */ + if ( fileSpecified ) + includeChecks = pd->id->makeIncludePathChecks( curFileName, fileName.c_str() ); + else { + char *test = new char[strlen(curFileName)+1]; + strcpy( test, curFileName ); + + includeChecks = new const char*[2]; + + includeChecks[0] = test; + includeChecks[1] = 0; + } + + /* Try to find the file. */ + ifstream *inFile = pd->id->tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + id->error(incLoc) << "include: failed to locate file" << endl; + const char **tried = includeChecks; + while ( *tried != 0 ) + id->error(incLoc) << "include: attempted: \"" << *tried++ << '\"' << endl; + + return; + } + + delete inFile; + +// /* Don't include anything that's already been included. */ +// if ( !pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { +// pd->includeHistory.push_back( IncludeHistoryItem( +// includeChecks[found], inclSectionName ) ); +// +// /* Either we are not in the lib, or a file was specifed, use the +// * file-based include pass. */ +// includePass.reduceFile( includeChecks[found], id->hostLang ); +// } + + const char *targetMachine0 = targetMachine; + const char *searchMachine0 = searchMachine; + + includeDepth += 1; + pd = 0; + + targetMachine = sectionName.c_str(); + searchMachine = machine.c_str(); + + // reduceFile( includeChecks[found] ); + +// if ( includePass.incItems.length() == 0 ) { +// pd->id->error(incLoc) << "could not find machine " << machine << +// " in " << fileName << endp; +// } +// else { +// /* Load the data into include el. Save in the dict. */ +// loadIncludeData( el, includePass, includeChecks[found] ); +// id->includeDict.insert( el ); +// includePass.incItems.empty(); +// } + + pd = pd0; + includeDepth -= 1; + + targetMachine = targetMachine0; + searchMachine = searchMachine0; +} + +void TopLevel::import( const InputLoc &loc, std::string name, Literal *literal ) +{ + MachineDef *machineDef = new MachineDef( + new Join( + new Expression( + new Term( + new FactorWithAug( + new FactorWithRep( + new FactorWithNeg( new Factor( literal ) ) + ) + ) + ) + ) + ) + ); + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( loc, name, machineDef, false ); + machineDef->join->loc = loc; +} + +void TopLevel::reduceFile( const char *cmd, const char *inputFileName ) +{ + const int baseN = 2; + const char **argv = new const char*[baseN + id->includePaths.length() + 1]; + argv[0] = cmd; + argv[1] = inputFileName; + for ( int i = 0; i < id->includePaths.length(); i++ ) + argv[baseN + i] = id->includePaths.data[i]; + argv[baseN + id->includePaths.length()] = 0; + + const char *prevCurFileName = curFileName; + curFileName = inputFileName; + + colm_program *program = colm_new_program( frontendSections ); + colm_set_debug( program, 0 ); + colm_set_reduce_clean( program, 0 ); + colm_set_reduce_ctx( program, this ); + colm_run_program( program, baseN + id->includePaths.length(), argv ); + id->streamFileNames.append( colm_extract_fns( program ) ); + + int length = 0; + const char *err = colm_error( program, &length ); + if ( err != 0 ) { + // std::cout << "error" << std::endl; + id->error_plain() << string( err, length ) << std::endl; + } + + colm_delete_program( program ); + + curFileName = prevCurFileName; + + delete[] argv; +} diff --git a/libfsm/reducer.h b/libfsm/reducer.h new file mode 100644 index 00000000..0d0f1af1 --- /dev/null +++ b/libfsm/reducer.h @@ -0,0 +1,120 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/pdarun.h> +#include <colm/bytecode.h> +#include <colm/defs.h> +#include <colm/input.h> +#include <colm/tree.h> +#include <colm/program.h> +#include <colm/colm.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include <iostream> +#include <vector> +#include <string> + +#include "vector.h" +#include "inputdata.h" +#include "parsedata.h" + +#ifndef _REDUCER_H +#define _REDUCER_H + +char *unescape( const char *s, int slen ); +char *unescape( const char *s ); + +struct SectionPass; + +struct TopLevel +{ + TopLevel( struct colm_sections *frontendSections, InputData *id, const HostLang *hostLang, + MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ) + : + frontendSections(frontendSections), + id(id), + section(0), + pd(0), + machineSpec(0), + machineName(0), + includeDepth(0), + hostLang(hostLang), + minimizeLevel(minimizeLevel), + minimizeOpt(minimizeOpt), + + /* Should be passed into the load, somehow. */ + targetMachine(0), + searchMachine(0), + paramList(0), + success(true), + isImport(false) + { + exportContext.append( false ); + } + + struct colm_sections *frontendSections; + InputData *id; + Section *section; + SectionPass *sectionPass; + ParseData *pd; + char *machineSpec; + char *machineName; + int includeDepth; + const HostLang *hostLang; + MinimizeLevel minimizeLevel; + MinimizeOpt minimizeOpt; + std::vector<std::string> writeArgs; + + /* Should this go in the parse data? Probably. */ + Vector<bool> exportContext; + + const char *curFileName; + + const char *targetMachine; + const char *searchMachine; + + ActionParamList *paramList; + bool success; + + /* Generated and called by colm. */ + void commit_reduce_forward( program_t *prg, tree_t **root, + struct pda_run *pda_run, parse_tree_t *pt ); + void read_reduce_forward( program_t *prg, FILE *file ); + + void loadMachineName( string data ); + void tryMachineDef( const InputLoc &loc, std::string name, + MachineDef *machineDef, bool isInstance ); + long tryLongScan( const InputLoc &loc, const char *data ); + void include( const InputLoc &incLoc, bool fileSpecified, string fileName, string machine ); + void reduceFile( const char *cmd, const char *inputFileName ); + + void import( const InputLoc &loc, std::string name, Literal *literal ); + void importFile( std::string fileName ); + + bool isImport; +}; + +#endif diff --git a/libfsm/rlscan.h b/libfsm/rlscan.h new file mode 100644 index 00000000..e8b4047d --- /dev/null +++ b/libfsm/rlscan.h @@ -0,0 +1,136 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _RLSCAN_H +#define _RLSCAN_H + +#include <iostream> +#include "rlscan.h" +#include "vector.h" +#ifdef WITH_RAGEL_KELBT +#include "rlparse.h" +#endif +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + +using std::istream; +using std::ostream; + +extern char *Parser6_lelNames[]; +struct Section; + +struct Scanner +{ + Scanner( InputData *id, const char *fileName, istream &input, + Parser6 *inclToParser, char *inclSectionTarg, + int includeDepth, bool importMachines ) + : + id(id), fileName(fileName), + input(input), + inclToParser(inclToParser), + inclSectionTarg(inclSectionTarg), + includeDepth(includeDepth), + importMachines(importMachines), + cur_token(0), + line(1), column(1), lastnl(0), + parser(0), ignoreSection(false), + parserExistsError(false), + whitespaceOn(true), + lastToken(0), + section(0), + sectionPass(false) + {} + + void handleMachine(); + void handleInclude(); + void handleImport(); + + void init(); + void token( int type, char *start, char *end ); + void token( int type, char c ); + void token( int type ); + void processToken( int type, char *tokdata, int toklen ); + void directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ); + void flushImport( ); + void importToken( int type, char *start, char *end ); + void pass( int token, char *start, char *end ); + void pass(); + void updateCol(); + void startSection(); + void endSection(); + void do_scan(); + bool active(); + InputLoc scan_loc(); + + InputData *id; + const char *fileName; + istream &input; + Parser6 *inclToParser; + char *inclSectionTarg; + int includeDepth; + bool importMachines; + + /* For import parsing. */ + int tok_cs, tok_act; + int *tok_ts, *tok_te; + int cur_token; + static const int max_tokens = 32; + int token_data[max_tokens]; + char *token_strings[max_tokens]; + int token_lens[max_tokens]; + + /* For section processing. */ + int cs; + char *word, *lit; + int word_len, lit_len; + + /* For character scanning. */ + int line; + InputLoc sectionLoc; + char *ts, *te; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + Parser6 *parser; + bool ignoreSection; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; + + /* Keeps a record of the previous token sent to the section parser. */ + int lastToken; + + Section *section; + bool sectionPass; + +}; + +#endif diff --git a/libfsm/switch.cc b/libfsm/switch.cc new file mode 100644 index 00000000..076f3585 --- /dev/null +++ b/libfsm/switch.cc @@ -0,0 +1,1036 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "switch.h" +#include "redfsm.h" +#include "gendata.h" + +#include <assert.h> + +std::ostream &Switch::TRANS_GOTO( int off, RedTransAp *trans ) +{ + out << "_trans = " << off << ";\n"; + return out; +} + +void Switch::RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = keyOps->eq( data[mid].lowKey, lower ); + bool limitHigh = keyOps->eq( data[mid].highKey, upper ); + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + out << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + out << "} else {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value ) << "\n"; + out << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << "if ( " << GET_KEY() << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + RANGE_B_SEARCH( state, lower, keyOps->sub( data[mid].lowKey, 1 ), low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << "} else {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << "if ( " << GET_KEY() << " > " << + KEY(data[mid].highKey) << " ) {\n"; + RANGE_B_SEARCH( state, keyOps->add( data[mid].highKey, 1 ), upper, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << "} else {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + } + else { + out << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid].lowKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << "if ( " << GET_KEY() << " <= " << + KEY(data[mid].highKey) << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_KEY() << " ) {\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + + out << "else {\n"; + DEFAULT( state ); + out << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + out << "{\n"; + TRANS_GOTO(transBase + state->outSingle.length() + (mid), data[mid].value) << "\n"; + out << "}\n"; + } + } +} + +void Switch::SINGLE_SWITCH( RedStateAp *st ) +{ + /* Load up the singles. */ + int numSingles = st->outSingle.length(); + RedTransEl *data = st->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_KEY() << " == " << + KEY(data[0].lowKey) << " ) {\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(transBase, data[0].value) << "\n"; + out << "\t}\n"; + + out << "else {\n"; + NOT_SINGLE( st ); + out << "}\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_KEY() << " ) {\n"; + + /* Write out the single indices. */ + for ( int j = 0; j < numSingles; j++ ) { + out << CASE( KEY(data[j].lowKey) ) << " {\n"; + TRANS_GOTO(transBase + j, data[j].value) << "\n"; + out << CEND() << "\n}\n"; + } + + out << CodeGen::DEFAULT() << " {\n"; + NOT_SINGLE( st ); + out << CEND() << "\n}\n"; + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void Switch::DEFAULT( RedStateAp *st ) +{ + if ( st->defTrans != 0 ) { + TRANS_GOTO( transBase + st->outSingle.length() + st->outRange.length(), st->defTrans ) << "\n"; + } +} + +void Switch::NOT_SINGLE( RedStateAp *st ) +{ + if ( st->outRange.length() > 0 ) { + RANGE_B_SEARCH( st, keyOps->minKey, keyOps->maxKey, + 0, st->outRange.length() - 1 ); + } + else { + DEFAULT( st ); + } +} + +void Switch::LOCATE_TRANS() +{ + transBase = 0; + + out << + " switch ( " << vCS() << " ) {\n"; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) { + out << CASE( STR( st->id ) ) << " {\n"; + out << CEND() << "\n}\n"; + } + else { + /* Label the state. */ + out << CASE( STR( st->id ) ) << " {\n"; + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) { + SINGLE_SWITCH( st ); + } + else { + NOT_SINGLE( st ); + } + + out << CEND() << "\n}\n"; + } + + transBase += st->outSingle.length() + + st->outRange.length() + + ( st->defTrans != 0 ? 1 : 0 ); + } + + out << + " }\n" + "\n"; +} + +void Switch::genAnalysis() +{ + redFsm->sortByStateId(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Choose the singles. */ + redFsm->moveSelectTransToSingle(); + + if ( redFsm->errState != 0 ) + redFsm->getErrorCond(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( red->id->errorCount > 0 ) + return; + + /* Anlayze Machine will find the final action reference counts, among other + * things. We will use these in reporting the usage of fsm directives in + * action code. */ + red->analyzeMachine(); + + setKeyType(); + + /* Run the analysis pass over the table data. */ + setTableState( TableArray::AnalyzePass ); + tableDataPass(); + + /* Switch the tables over to the code gen mode. */ + setTableState( TableArray::GeneratePass ); +} + + +void Switch::tableDataPass() +{ + if ( type == Loop ) + taActions(); + + taKeyOffsets(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + taIndices(); + + taTransCondSpacesWi(); + taTransOffsetsWi(); + taTransLengthsWi(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondTargs(); + taCondActions(); + + taToStateActions(); + taFromStateActions(); + taEofActions(); + taEofConds(); + taEofTrans(); + + taKeys(); + taCondKeys(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); +} + +void Switch::writeData() +{ + if ( type == Loop ) { + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) + taActions(); + } + + taKeyOffsets(); + taKeys(); + taSingleLens(); + taRangeLens(); + taIndexOffsets(); + + taTransCondSpaces(); + taTransOffsets(); + taTransLengths(); + + taCondKeys(); + taCondTargs(); + taCondActions(); + + if ( redFsm->anyToStateActions() ) + taToStateActions(); + + if ( redFsm->anyFromStateActions() ) + taFromStateActions(); + + if ( redFsm->anyEofActions() ) + taEofActions(); + + taEofConds(); + + if ( redFsm->anyEofTrans() ) + taEofTrans(); + + taNfaTargs(); + taNfaOffsets(); + taNfaPushActions(); + taNfaPopTrans(); + + STATE_IDS(); +} + + +void Switch::setKeyType() +{ + transKeys.setType( ALPH_TYPE(), alphType->size, alphType->isChar ); + transKeys.isSigned = keyOps->isSigned; +} + +void Switch::setTableState( TableArray::State state ) +{ + for ( ArrayVector::Iter i = arrayVector; i.lte(); i++ ) { + TableArray *tableArray = *i; + tableArray->setState( state ); + } +} + +void Switch::taKeyOffsets() +{ + keyOffsets.start(); + + int curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + keyOffsets.value( curKeyOffset ); + curKeyOffset += st->outSingle.length() + st->outRange.length() * 2; + } + + keyOffsets.finish(); +} + + +void Switch::taSingleLens() +{ + singleLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + singleLens.value( st->outSingle.length() ); + + singleLens.finish(); +} + + +void Switch::taRangeLens() +{ + rangeLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + rangeLens.value( st->outRange.length() ); + + rangeLens.finish(); +} + +void Switch::taIndexOffsets() +{ + indexOffsets.start(); + + int curIndOffset = 0; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + indexOffsets.value( curIndOffset ); + + /* Move the index offset ahead. */ + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + + indexOffsets.finish(); +} + +void Switch::taToStateActions() +{ + toStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + TO_STATE_ACTION(st); + + toStateActions.finish(); +} + +void Switch::taFromStateActions() +{ + fromStateActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + FROM_STATE_ACTION(st); + + fromStateActions.finish(); +} + +void Switch::taEofActions() +{ + eofActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + EOF_ACTION( st ); + + eofActions.finish(); +} + +void Switch::taEofConds() +{ + /* + * EOF Cond Spaces + */ + eofCondSpaces.start(); + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) + eofCondSpaces.value( st->outCondSpace->condSpaceId ); + else + eofCondSpaces.value( -1 ); + } + eofCondSpaces.finish(); + + /* + * EOF Cond Key Indixes + */ + eofCondKeyOffs.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long off = 0; + if ( st->outCondSpace != 0 ) { + off = curOffset; + curOffset += st->outCondKeys.length(); + } + eofCondKeyOffs.value( off ); + } + + eofCondKeyOffs.finish(); + + /* + * EOF Cond Key Lengths. + */ + eofCondKeyLens.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long len = 0; + if ( st->outCondSpace != 0 ) + len = st->outCondKeys.length(); + eofCondKeyLens.value( len ); + } + + eofCondKeyLens.finish(); + + /* + * EOF Cond Keys + */ + eofCondKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outCondSpace != 0 ) { + for ( int c = 0; c < st->outCondKeys.length(); c++ ) { + CondKey key = st->outCondKeys[c]; + eofCondKeys.value( key.getVal() ); + } + } + } + + eofCondKeys.finish(); +} + +void Switch::taEofTrans() +{ + eofTrans.start(); + + /* Need to compute transition positions. */ + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + totalTrans += st->outSingle.length(); + totalTrans += st->outRange.length(); + if ( st->defTrans != 0 ) + totalTrans += 1; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + long trans = 0; + if ( st->eofTrans != 0 ) { + trans = totalTrans + 1; + totalTrans += 1; + } + + eofTrans.value( trans ); + } + + eofTrans.finish(); +} + +void Switch::taKeys() +{ + transKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + transKeys.value( stel->lowKey.getVal() ); + } + + /* Loop the state's transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + /* Lower key. */ + transKeys.value( rtel->lowKey.getVal() ); + + /* Upper key. */ + transKeys.value( rtel->highKey.getVal() ); + } + } + + transKeys.finish(); +} + +void Switch::taIndices() +{ + indices.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + indices.value( stel->value->id ); + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + indices.value( rtel->value->id ); + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + indices.value( st->defTrans->id ); + } + + indices.finish(); +} + +void Switch::taTransCondSpaces() +{ + transCondSpaces.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + if ( trans->condSpace != 0 ) + transCondSpaces.value( trans->condSpace->condSpaceId ); + else + transCondSpaces.value( -1 ); + } + } + + transCondSpaces.finish(); +} + +void Switch::taTransOffsets() +{ + transOffsets.start(); + + int curOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transOffsets.value( curOffset ); + curOffset += trans->numConds(); + } + } + + errCondOffset = curOffset; + + transOffsets.finish(); +} + +void Switch::taTransLengths() +{ + transLengths.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + transLengths.value( trans->numConds() ); + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + transLengths.value( trans->numConds() ); + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + transLengths.value( trans->numConds() ); + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + transLengths.value( trans->numConds() ); + } + } + + transLengths.finish(); +} + +void Switch::taTransCondSpacesWi() +{ + transCondSpacesWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Cond Space id. */ + if ( trans->condSpace != 0 ) + transCondSpacesWi.value( trans->condSpace->condSpaceId ); + else + transCondSpacesWi.value( -1 ); + } + + transCondSpacesWi.finish(); +} + +void Switch::taTransOffsetsWi() +{ + transOffsetsWi.start(); + + int curOffset = 0; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transOffsetsWi.value( curOffset ); + + TransApSet::Iter next = trans; + next.increment(); + + curOffset += trans->numConds(); + } + + transOffsetsWi.finish(); +} + +void Switch::taTransLengthsWi() +{ + transLengthsWi.start(); + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + transLengthsWi.value( trans->numConds() ); + + TransApSet::Iter next = trans; + next.increment(); + } + + transLengthsWi.finish(); +} + +void Switch::taCondKeys() +{ + condKeys.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + CondKey key = trans->outCondKey( c ); + condKeys.value( key.getVal() ); + } + } + } + + condKeys.finish(); +} + +void Switch::taCondTargs() +{ + condTargs.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + condTargs.value( cond->targ->id ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + condTargs.value( cond->targ->id ); + } + + condTargs.finish(); +} + +void Switch::taCondActions() +{ + condActions.start(); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond( c ); + COND_ACTION( cond ); + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + /* Add any eof transitions that have not yet been written out above. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofTrans != 0 ) { + RedTransAp *trans = st->eofTrans; + for ( int c = 0; c < trans->numConds(); c++ ) { + RedCondPair *cond = trans->outCond(c); + COND_ACTION( cond ); + } + } + } + + if ( redFsm->errCond != 0 ) { + RedCondPair *cond = &redFsm->errCond->p; + COND_ACTION( cond ); + } + + condActions.finish(); +} + +void Switch::taNfaTargs() +{ + nfaTargs.start(); + + /* Offset of zero means no NFA targs, put a filler there. */ + nfaTargs.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaTargs.value( st->nfaTargs->length() ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + nfaTargs.value( targ->state->id ); + } + } + + nfaTargs.finish(); +} + +/* These need to mirror nfa targs. */ +void Switch::taNfaPushActions() +{ + nfaPushActions.start(); + + nfaPushActions.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + nfaPushActions.value( 0 ); + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_PUSH_ACTION( targ ); + } + } + + nfaPushActions.finish(); +} + +void Switch::taNfaPopTrans() +{ + nfaPopTrans.start(); + + nfaPopTrans.value( 0 ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs != 0 ) { + + nfaPopTrans.value( 0 ); + + for ( RedNfaTargs::Iter targ = *st->nfaTargs; targ.lte(); targ++ ) + NFA_POP_TEST( targ ); + } + } + + nfaPopTrans.finish(); +} + +void Switch::taNfaOffsets() +{ + nfaOffsets.start(); + + /* Offset of zero means no NFA targs, real targs start at 1. */ + long offset = 1; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->nfaTargs == 0 ) { + nfaOffsets.value( 0 ); + } + else { + nfaOffsets.value( offset ); + offset += 1 + st->nfaTargs->length(); + } + } + + nfaOffsets.finish(); +} + + +/* Write out the array of actions. */ +std::ostream &Switch::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +void Switch::taActions() +{ + actions.start(); + + /* Put "no-action" at the beginning. */ + actions.value( 0 ); + + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + actions.value( act->key.length() ); + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + actions.value( item->value->actionId ); + } + + actions.finish(); +} + + + + diff --git a/libfsm/switch.h b/libfsm/switch.h new file mode 100644 index 00000000..7f23778b --- /dev/null +++ b/libfsm/switch.h @@ -0,0 +1,106 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C_SWITCH_H +#define _C_SWITCH_H + +#include <iostream> +#include "codegen.h" +#include "tables.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +class Switch + : public virtual Tables +{ +protected: + enum Type { + Loop = 1, Exp + }; + +public: + Switch( const CodeGenArgs &args, Type type ) + : + Tables( args ), + type(type) + {} + + std::ostream &TRANS_GOTO( int off, RedTransAp *trans ); + void RANGE_B_SEARCH( RedStateAp *state, Key lower, Key upper, int low, int high ); + void SINGLE_SWITCH( RedStateAp *st ); + void DEFAULT( RedStateAp *st ); + void NOT_SINGLE( RedStateAp *st ); + void LOCATE_TRANS(); + +protected: + Type type; + int transBase; + + std::ostream &COND_KEYS_v1(); + std::ostream &COND_SPACES_v1(); + std::ostream &INDICES(); + std::ostream &INDEX_OFFSETS(); + std::ostream &SINGLE_LENS(); + std::ostream &RANGE_LENS(); + std::ostream &TRANS_TARGS_WI(); + std::ostream &ACTIONS_ARRAY(); + + void taKeyOffsets(); + void taSingleLens(); + void taRangeLens(); + void taIndexOffsets(); + void taIndices(); + void taTransCondSpacesWi(); + void taTransOffsetsWi(); + void taTransLengthsWi(); + void taTransCondSpaces(); + void taTransOffsets(); + void taTransLengths(); + void taCondTargs(); + void taCondActions(); + void taToStateActions(); + void taFromStateActions(); + void taEofTrans(); + void taEofConds(); + void taEofActions(); + void taKeys(); + void taActions(); + void taCondKeys(); + void taNfaTargs(); + void taNfaOffsets(); + void taNfaPushActions(); + void taNfaPopTrans(); + + void setKeyType(); + + void setTableState( TableArray::State ); + + virtual void writeData(); + virtual void tableDataPass(); + virtual void genAnalysis(); +}; + +#endif diff --git a/libfsm/switchbreak.cc b/libfsm/switchbreak.cc new file mode 100644 index 00000000..567dfbc4 --- /dev/null +++ b/libfsm/switchbreak.cc @@ -0,0 +1,75 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "switchbreak.h" + +void SwitchBreak::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + trans.ref() + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + trans.ref() + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } + + out << EMIT_LABEL( _match_cond ); +} + diff --git a/libfsm/switchbreak.h b/libfsm/switchbreak.h new file mode 100644 index 00000000..fdbac68c --- /dev/null +++ b/libfsm/switchbreak.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_SWITCHBREAK_H +#define RAGEL_SWITCHBREAK_H + +#include "switch.h" +#include "actloop.h" +#include "actexp.h" + +struct SwitchBreak +: + public Switch, public TabBreak +{ + SwitchBreak( const CodeGenArgs &args, Switch::Type type ) + : + Tables( args ), + Switch( args, type ), + TabBreak( args ) + {} + + void LOCATE_COND(); +}; + +class SwitchBreakLoop + : public SwitchBreak, public ActLoop +{ +public: + SwitchBreakLoop( const CodeGenArgs &args ) + : + Tables( args ), + SwitchBreak( args, Loop ), + ActLoop( args ) + {} +}; + + +class SwitchBreakExp + : public SwitchBreak, public ActExp +{ +public: + SwitchBreakExp( const CodeGenArgs &args ) + : + Tables( args ), + SwitchBreak( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/libfsm/switchgoto.cc b/libfsm/switchgoto.cc new file mode 100644 index 00000000..3b293c70 --- /dev/null +++ b/libfsm/switchgoto.cc @@ -0,0 +1,73 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "switchgoto.h" + +void SwitchGoto::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + trans.ref() + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + trans.ref() + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " while ( " << TRUE() << " ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " break;\n" + " }\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " break;\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/libfsm/switchgoto.h b/libfsm/switchgoto.h new file mode 100644 index 00000000..d8207325 --- /dev/null +++ b/libfsm/switchgoto.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_SWITCHGOTO_H +#define RAGEL_SWITCHGOTO_H + +#include "switch.h" +#include "actloop.h" +#include "actexp.h" + +struct SwitchGoto +: + public Switch, public TabGoto +{ + SwitchGoto( const CodeGenArgs &args, Switch::Type type ) + : + Tables( args ), + Switch( args, type ), + TabGoto( args ) + {} + + void LOCATE_COND(); +}; + +class SwitchGotoLoop + : public SwitchGoto, public ActLoop +{ +public: + SwitchGotoLoop( const CodeGenArgs &args ) + : + Tables( args ), + SwitchGoto( args, Loop ), + ActLoop( args ) + {} +}; + + +class SwitchGotoExp + : public SwitchGoto, public ActExp +{ +public: + SwitchGotoExp( const CodeGenArgs &args ) + : + Tables( args ), + SwitchGoto( args, Exp ), + ActExp( args ) + {} +}; + + +#endif diff --git a/libfsm/switchvar.cc b/libfsm/switchvar.cc new file mode 100644 index 00000000..b19f28db --- /dev/null +++ b/libfsm/switchvar.cc @@ -0,0 +1,77 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "switchvar.h" +#include "parsedata.h" +#include "inputdata.h" + +void SwitchVar::LOCATE_COND() +{ + if ( red->condSpaceList.length() > 0 ) { + std::stringstream success, error; + + out << + " " << ckeys << " = " << OFFSET( ARR_REF( condKeys ), ARR_REF( transOffsets ) + "[" + trans.ref() + "]" ) << ";\n" + " " << klen << " = " << CAST( "int" ) << ARR_REF( transLengths ) << "[" << trans << "];\n" + " " << cond << " = " << CAST( UINT() ) << ARR_REF( transOffsets ) << "[" << trans << "];\n" + "\n"; + + out << + " " << cpc << " = 0;\n"; + + if ( red->condSpaceList.length() > 0 ) + COND_EXEC( ARR_REF( transCondSpaces ) + "[" + trans.ref() + "]" ); + + success << + cond << " += " << CAST( UINT() ) << "(_mid - " << ckeys << ");\n"; + + error << + cond << " = " << errCondOffset << ";\n"; + + out << + " {\n" + " " << INDEX( ARR_TYPE( condKeys ), "_lower" ) << " = " << ckeys << ";\n" + " " << INDEX( ARR_TYPE( condKeys ), "_upper" ) << " = " << ckeys << " + " << klen << " - 1;\n" + " " << INDEX( ARR_TYPE( condKeys ), "_mid" ) << ";\n" + " _bsc = 1;\n" + " while ( _bsc == 1 ) {\n" + " if ( _upper < _lower ) {\n" + " " << error.str() << "\n" + " _bsc = 0;\n" + " }\n" + " else {\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << cpc << " < " << CAST("int") << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _upper = _mid - 1;\n" + " else if ( " << cpc << " > " << CAST( "int" ) << DEREF( ARR_REF( condKeys ), "_mid" ) << " )\n" + " _lower = _mid + 1;\n" + " else {\n" + " " << success.str() << "\n" + " _bsc = 0;\n" + " }\n" + " }\n" + " }\n" + " }\n" + ; + } +} + diff --git a/libfsm/switchvar.h b/libfsm/switchvar.h new file mode 100644 index 00000000..220963a4 --- /dev/null +++ b/libfsm/switchvar.h @@ -0,0 +1,72 @@ +/* + * Copyright 2014-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAGEL_SWITCHVAR_H +#define RAGEL_SWITCHVAR_H + +#include "switch.h" +#include "actloop.h" +#include "actexp.h" + +struct SwitchVar +: + public Switch, public TabVar +{ + SwitchVar( const CodeGenArgs &args, Switch::Type type ) + : + Tables( args ), + Switch( args, type ), + TabVar( args ) + {} + + void VAR_COND_BIN_SEARCH( Variable &var, TableArray &keys, std::string ok, std::string error ); + + //void LOCATE_TRANS(); + void LOCATE_COND(); +}; + +class SwitchVarLoop + : public SwitchVar, public ActLoop +{ +public: + SwitchVarLoop( const CodeGenArgs &args ) + : + Tables( args ), + SwitchVar( args, Loop ), + ActLoop( args ) + {} +}; + +class SwitchVarExp +: + public SwitchVar, public ActExp +{ +public: + SwitchVarExp( const CodeGenArgs &args ) + : + Tables( args ), + SwitchVar( args, Exp ), + ActExp( args ) + {} +}; + +#endif diff --git a/libfsm/tabbreak.cc b/libfsm/tabbreak.cc new file mode 100644 index 00000000..5ded768a --- /dev/null +++ b/libfsm/tabbreak.cc @@ -0,0 +1,378 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" +#include "binary.h" +#include "flat.h" + +std::string TabBreak::BREAK( GotoLabel &label ) +{ + string ret = "break"; + if ( loopLabels ) { + ret += " "; + ret += label.ref(); + } + return ret; +} + +std::string TabBreak::CONTINUE( GotoLabel &label ) +{ + string ret = "continue"; + if ( loopLabels ) { + ret += " "; + ret += label.ref(); + } + return ret; +} + +std::string TabBreak::BREAK_LABEL( GotoLabel &label ) +{ + if ( loopLabels ) { + if ( label.isReferenced ) + return std::string(label.name) + "::\n"; + } + return ""; +} + +void TabBreak::CONTROL_JUMP( ostream &ret, bool inFinish ) +{ + ret << "if ( " << TRUE() << " ) break " << _again << ";"; +} + +void TabBreak::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << ";"; + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << " = " << + callDest << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << "; " << CLOSE_GEN_BLOCK(); +} + +void TabBreak::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void TabBreak::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void TabBreak::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + "break " << _resume << "; " << + CLOSE_GEN_BLOCK(); +} + +void TabBreak::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + nbreak << " = 1;" << + CLOSE_GEN_BLOCK(); +} + +void TabBreak::writeExec() +{ + out << + " {\n"; + + DECLARE( INT(), ps ); + DECLARE( INT(), cpc ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), klen ); + DECLARE( INDEX( ARR_TYPE( condKeys ) ), ckeys ); + DECLARE( INDEX( ARR_TYPE( eofCondKeys ) ), cekeys ); + DECLARE( UINT(), trans, " = 0" ); + DECLARE( UINT(), cond, " = 0" ); + DECLARE( INDEX( ALPH_TYPE() ), keys ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( INDEX( ARR_TYPE( indices ) ), inds ); + DECLARE( UINT(), nacts ); + DECLARE( INT(), have ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INT(), ic ); + + out << BREAK_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " while ( " << P() << " != " << PE() << " || " << P() << " == " << vEOF() << " ) {\n"; + } + else { + out << + " while ( " << P() << " != " << PE() << " ) {\n"; + } + } + else { + out << + " while ( " << TRUE() << " ) {\n"; + + } + + NFA_PUSH( vCS() ); + + if ( loopLabels ) { + out << BREAK_LABEL( _again ); + out << "while ( " << TRUE() << " ) {\n"; + } + + FROM_STATE_ACTIONS(); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( redFsm->anyEofTrans() || redFsm->anyEofActions() ) { + if ( redFsm->anyEofTrans() ) { + out << + " if ( " << ARR_REF( eofTrans ) << "[" << vCS() << "] > 0 ) {\n" + " " << trans << " = " << + CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n" + " }\n"; + } + } + + out << + "}\n" + "else {\n"; + } + + LOCATE_TRANS(); + + if ( !noEnd && eof ) { + out << + "}\n"; + } + + LOCATE_COND(); + + if ( redFsm->anyRegCurStateRef() ) + out << " " << ps << " = " << vCS() << ";\n"; + + string condVar = + red->condSpaceList.length() != 0 ? cond.ref() : trans.ref(); + + out << + " " << vCS() << " = " << CAST(INT()) << ARR_REF( condTargs ) << "[" << condVar << "];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << ARR_REF( condActions ) << "[" << condVar << "] != 0 ) {\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) + out << " " << nbreak << " = 0;\n"; + + REG_ACTIONS( condVar ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " " << BREAK( _resume ) << ";\n"; + } + + out << "}\n"; + } + + + if ( loopLabels ) { + out << BREAK( _again ) << ";\n}\n"; + } + + out << "\n" << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " " << BREAK( _resume ) << ";\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " " << CONTINUE( _resume ) << ";\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " " << BREAK ( _resume ) << ";\n" + "\n" + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + } + else { + out << + " " << BREAK( _resume ) << ";\n"; + } + + out << + "}\n"; + + out << EMIT_LABEL( _out ); + + out << " }\n"; +} + diff --git a/libfsm/tabgoto.cc b/libfsm/tabgoto.cc new file mode 100644 index 00000000..ca90cb9d --- /dev/null +++ b/libfsm/tabgoto.cc @@ -0,0 +1,330 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" +#include "binary.h" +#include "flat.h" + +void TabGoto::CONTROL_JUMP( ostream &ret, bool inFinish ) +{ + ret << "goto " << _again << ";"; +} + +void TabGoto::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << ";"; + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << " = " << + callDest << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << + TOP() << " += 1;" << vCS() << " = " << + callDest << "; " << CLOSE_GEN_BLOCK(); +} + +void TabGoto::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";"; + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << "; " << CLOSE_GEN_BLOCK(); +} + +void TabGoto::RET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + CONTROL_JUMP( ret, inFinish ); + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << " -= 1;" << vCS() << " = " << STACK() << "[" << TOP() << "];"; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void TabGoto::BREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + "goto " << _out << "; " << + CLOSE_GEN_BLOCK(); +} + +void TabGoto::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << " += 1; " << + nbreak << " = 1;" << + CLOSE_GEN_BLOCK(); +} + +void TabGoto::writeExec() +{ + out << + " {\n"; + + DECLARE( INT(), ps ); + DECLARE( INT(), cpc ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), klen ); + DECLARE( INDEX( ARR_TYPE( condKeys ) ), ckeys ); + DECLARE( INDEX( ARR_TYPE( eofCondKeys ) ), cekeys ); + DECLARE( UINT(), trans, " = 0" ); + DECLARE( UINT(), cond, " = 0" ); + DECLARE( INDEX( ALPH_TYPE() ), keys ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( INDEX( ARR_TYPE( indices ) ), inds ); + DECLARE( UINT(), nacts ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INT(), ic ); + + out << EMIT_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " if ( " << P() << " == " << PE() << " && " << P() << " != " << vEOF() << " )\n" + " goto " << _out << ";\n"; + } + else { + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto " << _out << ";\n"; + } + } + + NFA_PUSH( vCS() ); + + FROM_STATE_ACTIONS(); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( redFsm->anyEofTrans() || redFsm->anyEofActions() ) { + if ( redFsm->anyEofTrans() ) { + out << + " if ( " << ARR_REF( eofTrans ) << "[" << vCS() << "] > 0 ) {\n" + " " << trans << " = " << + CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n" + " }\n"; + } + } + + out << + "}\n" + "else {\n"; + } + + LOCATE_TRANS(); + + if ( !noEnd && eof ) { + out << + "}\n"; + } + + LOCATE_COND(); + + if ( redFsm->anyRegCurStateRef() ) + out << " " << ps << " = " << vCS() << ";\n"; + + string condVar = + red->condSpaceList.length() != 0 ? cond.ref() : trans.ref(); + + out << + " " << vCS() << " = " << CAST(INT()) << ARR_REF( condTargs ) << "[" << condVar << "];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << ARR_REF( condActions ) << "[" << condVar << "] != 0 ) {\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) + out << " " << nbreak << " = 0;\n"; + + REG_ACTIONS( condVar ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " goto " << _out << ";\n"; + } + + out << "}\n"; + } + + out << "\n" << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " goto " << _out << ";\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " goto " << _resume << ";\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " goto " << _out << ";\n" + "\n" + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + out << "goto " << _resume << ";\n"; + } + + out << EMIT_LABEL( _out ); + + out << " }\n"; +} + diff --git a/libfsm/tables.cc b/libfsm/tables.cc new file mode 100644 index 00000000..40edd93e --- /dev/null +++ b/libfsm/tables.cc @@ -0,0 +1,81 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" + +void Tables::CURS( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_EXPR() << ps << CLOSE_GEN_EXPR(); +} + +void Tables::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << OPEN_GEN_EXPR() << vCS() << CLOSE_GEN_EXPR(); +} + +void Tables::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << nextDest << ";" << CLOSE_GEN_BLOCK(); +} + +void Tables::NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << "" << vCS() << " = " << OPEN_HOST_EXPR(); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";" << CLOSE_GEN_BLOCK(); +} + +void Tables::EOF_TRANS() +{ + out << + "" << trans << " = " << CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n"; + + if ( red->condSpaceList.length() > 0 ) { + out << + "" << cond << " = " << CAST(UINT()) << ARR_REF( transOffsets ) << "[" << trans << "];\n"; + } +} + +void Tables::COND_EXEC( std::string expr ) +{ + out << + " switch ( " << expr << " ) {\n" + "\n"; + + for ( CondSpaceList::Iter csi = red->condSpaceList; csi.lte(); csi++ ) { + GenCondSpace *condSpace = csi; + out << " " << CASE( STR( condSpace->condSpaceId ) ) << " {\n"; + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = (1 << csi.pos()); + out << " ) " << cpc << " += " << condValOffset << ";\n"; + } + + out << + " " << CEND() << "\n}\n"; + } + + out << + " }\n"; +} + diff --git a/libfsm/tables.h b/libfsm/tables.h new file mode 100644 index 00000000..258f869e --- /dev/null +++ b/libfsm/tables.h @@ -0,0 +1,265 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _TABLES_H +#define _TABLES_H + +#include <iostream> +#include "codegen.h" + +struct Tables +: + public CodeGen +{ + Tables( const CodeGenArgs &args ) + : + CodeGen( args ), + + pa( "_pa" ), + klen( "_klen" ), + ckeys( "_ckeys" ), + cekeys( "_cekeys" ), + trans( "_trans" ), + cond( "_cond" ), + keys( "_keys" ), + acts( "_acts" ), + nacts( "_nacts" ), + inds( "_inds" ), + + cont( "_cont" ), + nfa_repeat( "_nfa_repeat" ), + nfa_test( "_nfa_test" ), + ps( "_ps" ), + nbreak( "_nbreak" ), + have( "__have" ), + ic( "_ic" ), + + _out("_out"), + _pop("_pop"), + _test_eof( "_test_eof" ), + _resume( "_resume" ), + _match_cond( "_match_cond" ), + _again( "_again" ), + _match( "_match" ), + _eof_goto( "_eof_goto" ), + + actions( "actions", *this ), + transKeys( "trans_keys", *this ), + charClass( "char_class", *this ), + flatIndexOffset( "index_offsets", *this ), + indices( "indices", *this ), + indexDefaults( "index_defaults", *this ), + transCondSpaces( "trans_cond_spaces", *this ), + transOffsets( "trans_offsets", *this ), + condTargs( "cond_targs", *this ), + condActions( "cond_actions", *this ), + toStateActions( "to_state_actions", *this ), + fromStateActions( "from_state_actions", *this ), + eofCondSpaces( "eof_cond_spaces", *this ), + eofCondKeyOffs( "eof_cond_key_offs", *this ), + eofCondKeyLens( "eof_cond_key_lens", *this ), + eofCondKeys( "eof_cond_keys", *this ), + eofActions( "eof_actions", *this ), + eofTrans( "eof_trans", *this ), + + keyOffsets( "key_offsets", *this ), + singleLens( "single_lengths", *this ), + rangeLens( "range_lengths", *this ), + indexOffsets( "index_offsets", *this ), + transCondSpacesWi( "trans_cond_spaces_wi", *this ), + transOffsetsWi( "trans_offsets_wi", *this ), + transLengthsWi( "trans_lengths_wi", *this ), + transLengths( "trans_lengths", *this ), + condKeys( "cond_keys", *this ) + {} + + Variable pa; + Variable klen; + Variable ckeys; + Variable cekeys; + Variable trans; + Variable cond; + Variable keys; + Variable acts; + Variable nacts; + Variable inds; + Variable cont; + Variable nfa_repeat; + Variable nfa_test; + Variable ps; + Variable nbreak; + Variable have; + Variable ic; + + GotoLabel _out; + GotoLabel _pop; + GotoLabel _test_eof; + GotoLabel _resume; + GotoLabel _match_cond; + GotoLabel _again; + GotoLabel _match; + GotoLabel _eof_goto; + + TableArray actions; + TableArray transKeys; + TableArray charClass; + TableArray flatIndexOffset; + TableArray indices; + TableArray indexDefaults; + TableArray transCondSpaces; + TableArray transOffsets; + TableArray condTargs; + TableArray condActions; + TableArray toStateActions; + TableArray fromStateActions; + TableArray eofCondSpaces; + TableArray eofCondKeyOffs; + TableArray eofCondKeyLens; + TableArray eofCondKeys; + TableArray eofActions; + TableArray eofTrans; + + TableArray keyOffsets; + TableArray singleLens; + TableArray rangeLens; + TableArray indexOffsets; + TableArray transCondSpacesWi; + TableArray transOffsetsWi; + TableArray transLengthsWi; + TableArray transLengths; + TableArray condKeys; + + int errCondOffset; + + virtual void TO_STATE_ACTION( RedStateAp *state ) = 0; + virtual void FROM_STATE_ACTION( RedStateAp *state ) = 0; + virtual void EOF_ACTION( RedStateAp *state ) = 0; + virtual void COND_ACTION( RedCondPair *cond ) = 0; + + virtual void NFA_PUSH_ACTION( RedNfaTarg *targ ) = 0; + virtual void NFA_POP_TEST( RedNfaTarg *targ ) = 0; + virtual void NFA_FROM_STATE_ACTION_EXEC() = 0; + + virtual void FROM_STATE_ACTIONS() = 0; + virtual void REG_ACTIONS( std::string cond ) = 0; + virtual void TO_STATE_ACTIONS() = 0; + virtual void EOF_ACTIONS() = 0; + + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void NEXT_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void EOF_TRANS(); + void COND_EXEC( std::string expr ); +}; + +struct TabGoto +: + public virtual Tables +{ + TabGoto( const CodeGenArgs &args ) + : + Tables( args ) + {} + + void CONTROL_JUMP( ostream &ret, bool inFinish ); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + void NFA_POP() {} + + void writeExec(); +}; + +struct TabBreak +: + public virtual Tables +{ + TabBreak( const CodeGenArgs &args ) + : + Tables( args ), + loopLabels( args.loopLabels ) + {} + + void CONTROL_JUMP( ostream &ret, bool inFinish ); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + void NFA_POP() {} + + void writeExec(); + + bool loopLabels; + std::string BREAK( GotoLabel &label ); + std::string CONTINUE( GotoLabel &label ); + std::string BREAK_LABEL( GotoLabel &label ); +}; + +struct TabVar +: + public virtual Tables +{ + TabVar( const CodeGenArgs &args ) + : + Tables( args ) + {} + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NCALL( ostream &ret, int callDest, int targState, bool inFinish ); + void CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void NRET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState, bool csForced ); + void NBREAK( ostream &ret, int targState, bool csForced ); + + void NFA_POP() {} + + std::string BREAK( GotoLabel &label ); + std::string CONTINUE( GotoLabel &label ); + std::string BREAK_LABEL( GotoLabel &label ); + + void writeExec(); +}; + + +#endif diff --git a/libfsm/tabvar.cc b/libfsm/tabvar.cc new file mode 100644 index 00000000..02bd7b55 --- /dev/null +++ b/libfsm/tabvar.cc @@ -0,0 +1,332 @@ +/* + * Copyright 2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tables.h" +#include "flatvar.h" +#include "binvar.h" + +std::string TabVar::BREAK( GotoLabel &label ) +{ + return "{ _cont = 0; _again = 0; }"; +} + +std::string TabVar::CONTINUE( GotoLabel &label ) +{ + return "{ _cont = 0; _again = 1; }"; +} + +std::string TabVar::BREAK_LABEL( GotoLabel &label ) +{ + return ""; +} + +void TabVar::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << gotoDest << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::GOTO_EXPR( ostream &ret, GenInlineItem *ilItem, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << vCS() << " = " << OPEN_HOST_EXPR( "-", 1 ); + INLINE_LIST( ret, ilItem->children, 0, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + red->id->error() << "cannot use fcall in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NCALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << " = " << + callDest << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::CALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + red->id->error() << "cannot use fcall in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NCALL_EXPR( ostream &ret, GenInlineItem *ilItem, int targState, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK(); + + if ( red->prePushExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->prePushExpr ); + INLINE_LIST( ret, red->prePushExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << STACK() << "[" << TOP() << "] = " << + vCS() << "; " << TOP() << " += 1;" << vCS() << + " = " << OPEN_HOST_EXPR( "-", 1 ); + INLINE_LIST( ret, ilItem->children, targState, inFinish, false ); + ret << CLOSE_HOST_EXPR() << ";" << CLOSE_GEN_BLOCK(); +} + +void TabVar::RET( ostream &ret, bool inFinish ) +{ + red->id->error() << "cannot use fret in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NRET( ostream &ret, bool inFinish ) +{ + ret << OPEN_GEN_BLOCK() << TOP() << "-= 1;" << vCS() << " = " << + STACK() << "[" << TOP() << "]; "; + + if ( red->postPopExpr != 0 ) { + ret << OPEN_HOST_BLOCK( red->postPopExpr ); + INLINE_LIST( ret, red->postPopExpr->inlineList, 0, false, false ); + ret << CLOSE_HOST_BLOCK(); + } + + ret << CLOSE_GEN_BLOCK(); +} + +void TabVar::BREAK( ostream &ret, int targState, bool csForced ) +{ + red->id->error() << "cannot use fbreak in -B mode" << std::endl; + red->id->abortCompile( 1 ); +} + +void TabVar::NBREAK( ostream &ret, int targState, bool csForced ) +{ + ret << + OPEN_GEN_BLOCK() << + P() << "+= 1;\n" << + nbreak << " = 1;" << + CLOSE_GEN_BLOCK(); +} + +void TabVar::writeExec() +{ + out << + "{\n"; + + DECLARE( INT(), ps ); + DECLARE( INT(), cpc ); + DECLARE( INT(), nbreak ); + DECLARE( INT(), klen ); + DECLARE( INDEX( ARR_TYPE( condKeys ) ), ckeys ); + DECLARE( INDEX( ARR_TYPE( eofCondKeys ) ), cekeys ); + DECLARE( UINT(), trans, " = 0" ); + DECLARE( UINT(), cond, " = 0" ); + DECLARE( INDEX( ALPH_TYPE() ), keys ); + DECLARE( INDEX( ARR_TYPE( actions ) ), acts ); + DECLARE( INDEX( ARR_TYPE( indices ) ), inds ); + DECLARE( UINT(), nacts ); + DECLARE( INT(), have ); + DECLARE( INT(), pop_test ); + DECLARE( INT(), new_recs ); + DECLARE( INT(), alt ); + DECLARE( INT(), ic ); + + out << UINT() << " _have = 0;\n"; + out << UINT() << " _cont = 1;\n"; + out << UINT() << " _again = 1;\n"; + out << UINT() << " _bsc = 1;\n"; + + out << BREAK_LABEL( _resume ); + + /* Do we break out on no more input. */ + bool eof = redFsm->anyEofActivity() || redFsm->anyNfaStates(); + if ( !noEnd ) { + if ( eof ) { + out << + " while ( _again == 1 && ( " << P() << " != " << PE() << " || " << P() << " == " << vEOF() << " ) ) {\n"; + } + else { + out << + " while ( _again == 1 && " << P() << " != " << PE() << " ) {\n"; + } + } + else { + out << + " while ( _again == 1 ) {\n"; + + } + + out << "_cont = 1;\n"; + out << "_again = 1;\n"; + + NFA_PUSH( vCS() ); + + FROM_STATE_ACTIONS(); + + if ( !noEnd && eof ) { + out << + "if ( " << P() << " == " << vEOF() << " ) {\n"; + + if ( redFsm->anyEofTrans() || redFsm->anyEofActions() ) { + if ( redFsm->anyEofTrans() ) { + out << + " if ( " << ARR_REF( eofTrans ) << "[" << vCS() << "] > 0 ) {\n" + " " << trans << " = " << + CAST(UINT()) << ARR_REF( eofTrans ) << "[" << vCS() << "] - 1;\n" + " }\n"; + } + } + + out << + "}\n" + "else {\n"; + } + + LOCATE_TRANS(); + + if ( !noEnd && eof ) { + out << + "}\n"; + } + + LOCATE_COND(); + + if ( redFsm->anyRegCurStateRef() ) + out << " " << ps << " = " << vCS() << ";\n"; + + string condVar = + red->condSpaceList.length() != 0 ? cond.ref() : trans.ref(); + + out << + " " << vCS() << " = " << CAST(INT()) << ARR_REF( condTargs ) << "[" << condVar << "];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << ARR_REF( condActions ) << "[" << condVar << "] != 0 ) {\n" + "\n"; + + if ( redFsm->anyRegNbreak() ) + out << " " << nbreak << " = 0;\n"; + + REG_ACTIONS( condVar ); + + if ( redFsm->anyRegNbreak() ) { + out << + " if ( " << nbreak << " == 1 )\n" + " " << BREAK( _resume ) << "\n"; + } + + out << "}\n"; + } + + out << "if ( _cont == 1 ) {\n"; + + out << "\n" << EMIT_LABEL( _again ); + + if ( !noEnd && eof ) { + out << + " if ( " << P() << " == " << vEOF() << " ) {\n" + " if ( " << vCS() << " >= " << FIRST_FINAL_STATE() << " )\n" + " " << BREAK( _resume ) << "\n" + " }\n" + " else {\n"; + } + + TO_STATE_ACTIONS(); + + if ( redFsm->errState != 0 ) { + out << + " if ( " << vCS() << " != " << redFsm->errState->id << " ) {\n"; + } + + out << + " " << P() << " += 1;\n" + " " << CONTINUE( _resume ) << "\n"; + + if ( redFsm->errState != 0 ) { + out << + " }\n"; + } + + if ( !noEnd && eof ) { + out << + " }\n"; + } + + out << "if ( _cont == 1 ) {\n"; + + if ( redFsm->anyNfaStates() ) { + out << + " if ( nfa_len == 0 )\n" + " " << BREAK ( _resume ) << "\n" + "\n"; + + out << "if ( _cont == 1 ) {\n"; + + out << + " nfa_count += 1;\n" + " nfa_len -= 1;\n" + " " << P() << " = nfa_bp[nfa_len].p;\n" + ; + + if ( redFsm->bAnyNfaPops ) { + NFA_FROM_STATE_ACTION_EXEC(); + + NFA_POP_TEST_EXEC(); + + out << + " if ( " << pop_test << " )\n" + " " << vCS() << " = nfa_bp[nfa_len].state;\n" + " else\n" + " " << vCS() << " = " << ERROR_STATE() << ";\n"; + } + else { + out << + " " << vCS() << " = nfa_bp[nfa_len].state;\n"; + + } + + NFA_POST_POP(); + + /* cont */ + out << "}\n"; + } + else { + out << + " " << BREAK( _resume ) << "\n"; + } + + /* cont */ + out << "}}\n"; + + /* P loop. */ + out << "}\n"; + + out << EMIT_LABEL( _out ); + + /* Variable decl. */ + out << "}\n"; +} + diff --git a/libfsm/version.h.cmake.in b/libfsm/version.h.cmake.in new file mode 100644 index 00000000..0f9fa015 --- /dev/null +++ b/libfsm/version.h.cmake.in @@ -0,0 +1,9 @@ +/* version.h Generated from version.h.cmake.in by cmake */ + +#ifndef _RAGEL_VERSION_H +#define _RAGEL_VERSION_H + +#cmakedefine RAGEL_VERSION "@RAGEL_VERSION@" +#cmakedefine RAGEL_PUBDATE "@RAGEL_PUBDATE@" + +#endif /* _RAGEL_VERSION_H */ diff --git a/libfsm/xmlparse.kh b/libfsm/xmlparse.kh new file mode 100644 index 00000000..1b0b30ad --- /dev/null +++ b/libfsm/xmlparse.kh @@ -0,0 +1,211 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _XMLPARSE_H +#define _XMLPARSE_H + +#include "vector.h" +#include "gendata.h" +#include "buffer.h" +#include <iostream> + +using std::istream; +using std::ostream; + +#define XML_BUFSIZE 4096 + +struct AttrMarker +{ + char *id; + int idLen; + char *value; + int valueLen; +}; + +struct Attribute +{ + char *id; + char *value; +}; + +typedef Vector<AttrMarker> AttrMkList; +typedef Vector<Attribute> AttrList; +struct XMLTagHashPair; + +struct XMLTag +{ + enum TagType { Open, Close }; + + XMLTag( XMLTagHashPair *tagId, TagType type ) : + tagId(tagId), type(type), + content(0), attrList(0) {} + + Attribute *findAttr( const char *id ) + { + if ( attrList != 0 ) { + for ( AttrList::Iter attr = *attrList; attr.lte(); attr++ ) { + if ( strcmp( id, attr->id ) == 0 ) + return attr; + } + } + return 0; + } + + XMLTagHashPair *tagId; + TagType type; + + /* Content is associtated with closing tags. */ + char *content; + + /* Attribute lists are associated with opening tags. */ + AttrList *attrList; +}; + + +struct XMLTagHashPair +{ + const char *name; + int id; +}; + +struct Token; + +struct GenInlineItem; +struct GenInlineList; + +struct LmSwitchVect; +struct LmSwitchAction; + +struct XmlScanner +{ + XmlScanner( const char *fileName, istream &input ); + + int scan(); + void adjustAttrPointers( int distance ); + std::ostream &error(); + + const char *fileName; + istream &input; + + /* Scanner State. */ + int cs, act, have, curline, curcol; + char *ts, *te; + char *p, *pe; + int done; + + /* Token data */ + char *data; + int data_len; + int value; + AttrMkList attrMkList; + Buffer buffer; + char *tag_id_start; + int tag_id_len; + int token_col, token_line; + + char buf[XML_BUFSIZE]; +}; + + +struct XmlParser +{ + %%{ + parser XmlParser; + + token TAG_unknown, TAG_ragel, TAG_ragel_def, TAG_host, TAG_state_list, + TAG_state, TAG_trans_list, TAG_t, TAG_machine, TAG_start_state, + TAG_error_state, TAG_action_list, TAG_action_table_list, + TAG_action, TAG_action_table, TAG_alphtype, TAG_element, + TAG_getkey, TAG_state_actions, TAG_entry_points, TAG_sub_action, + TAG_cond_space_list, TAG_cond_space, TAG_cond_list, TAG_c, + TAG_exports, TAG_ex; + + # Inline block tokens. + token TAG_text, TAG_goto, TAG_call, TAG_next, TAG_goto_expr, + TAG_call_expr, TAG_next_expr, TAG_ret, TAG_pchar, TAG_char, + TAG_hold, TAG_exec, TAG_curs, TAG_targs, TAG_entry, TAG_data, + TAG_lm_switch, TAG_init_act, TAG_set_act, TAG_set_tokend, + TAG_get_tokend, TAG_init_tokstart, TAG_set_tokstart; + + token TAG_write, TAG_access, TAG_break, TAG_arg, TAG_cs_expr; + + token TAG_p_expr, TAG_pe_expr, TAG_eof_expr, TAG_cs_expr, TAG_top_expr, + TAG_stack_expr, TAG_act_expr, TAG_tokstart_expr, TAG_tokend_expr, + TAG_data_expr, TAG_prepush, TAG_postpop, TAG_eof_t; + }%% + + %% write instance_data; + + void init(); + int parseLangEl( int type, const Token *token ); + + XmlParser( const char *sourceFileName, const char *xmlFileName, bool outputActive, bool wantComplete ) : + sourceFileName(sourceFileName), + fileName(xmlFileName), + outStream(0), + outputActive(outputActive), + wantComplete(wantComplete), + cgd(0) { } + + int token( int tokenId, Token &token ); + int token( int tokenId, int col, int line ); + int token( XMLTag *tag, int col, int line ); + + void openOutput(); + + /* Report an error encountered by the parser. */ + ostream &warning( const InputLoc &loc ); + ostream &error(); + ostream &error( const InputLoc &loc ); + ostream &parser_error( int tokId, Token &token ); + ostream &source_error( const InputLoc &loc ); + + /* The name of the root section, this does not change during an include. */ + const char *sourceFileName; + const char *fileName; + ostream *outStream; + bool outputActive; + bool wantComplete; + + /* Collected during parsing. */ + char *attrKey; + char *attrValue; + int curAction; + int curActionTable; + int curTrans; + int curState; + int curCondSpace; + int curStateCond; + + CodeGenData *cgd; + CodeGenMap codeGenMap; + + Vector <char*> writeOptions; +}; + +%% write token_defs; + +int xml_parse( std::istream &input, const char *fileName, + bool outputActive, bool wantComplete, + XmlScanner &scanner, XmlParser &parser ); + +#endif diff --git a/libfsm/xmlparse.kl b/libfsm/xmlparse.kl new file mode 100644 index 00000000..04d95b83 --- /dev/null +++ b/libfsm/xmlparse.kl @@ -0,0 +1,1006 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "xmlparse.h" +#include "common.h" +#include "gendata.h" +#include "version.h" +#include <iostream> +#include <stdlib.h> + +using std::ostream; +using std::istream; +using std::cerr; +using std::endl; + +Key readKey( char *td, char **end ); +long readOffsetPtr( char *td, char **end ); +unsigned long readLength( char *td ); + +struct Token +{ + XMLTag *tag; + InputLoc loc; +}; + +%%{ + +parser XmlParser; + +include "xmlparse.kh"; + +start: tag_ragel; +start: + final { + /* If we get no input the assumption is that the frontend died and + * emitted an error. This forces the backend to return a non-zero + * exit status, but does not print an error. */ + gblErrorCount += 1; + }; + +tag_ragel: tag_ragel_head ragel_def_list host_or_write_list '/' TAG_ragel; + +tag_ragel_head: TAG_ragel + final { + /* Check version used to generated the intermediate file. */ + Attribute *versionAttr = $1->tag->findAttr( "version" ); + if ( versionAttr == 0 ) + error($1->loc) << "tag <ragel> requires a version attribute" << endp; + if ( strcmp( versionAttr->value, VERSION ) != 0 ) + error($1->loc) << "version mismatch between frontend and backend" << endp; + + /* Check for file name attribute. */ + Attribute *fileNameAttr = $1->tag->findAttr( "filename" ); + if ( fileNameAttr == 0 ) + error($1->loc) << "tag <ragel> requires a filename attribute" << endp; + sourceFileName = fileNameAttr->value; + + /* Check for language attribute. */ + Attribute *langAttr = $1->tag->findAttr( "lang" ); + if ( langAttr == 0 ) + error($1->loc) << "tag <ragel> requires a lang attribute" << endp; + + if ( generateDot ) + outStream = dotOpenOutput( sourceFileName ); + else if ( strcmp( langAttr->value, "C" ) == 0 ) { + hostLang = &hostLangC; + outStream = cdOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "D" ) == 0 ) { + hostLang = &hostLangD; + outStream = cdOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "Java" ) == 0 ) { + hostLang = &hostLangJava; + outStream = javaOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "Ruby" ) == 0 ) { + hostLang = &hostLangRuby; + outStream = rubyOpenOutput( sourceFileName ); + } + else if ( strcmp( langAttr->value, "C#" ) == 0 ) { + hostLang = &hostLangCSharp; + outStream = csharpOpenOutput( sourceFileName ); + } + else { + error($1->loc) << "expecting lang attribute to be " + "one of C, D, Java, Ruby or C#" << endp; + } + }; + +ragel_def_list: ragel_def_list ragel_def; +ragel_def_list: ; + +host_or_write_list: host_or_write_list host_or_write; +host_or_write_list: ; + +host_or_write: tag_host; +host_or_write: tag_write; + +tag_host: + TAG_host '/' TAG_host + final { + Attribute *lineAttr = $1->tag->findAttr( "line" ); + if ( lineAttr == 0 ) + error($1->loc) << "tag <host> requires a line attribute" << endp; + else { + int line = atoi( lineAttr->value ); + if ( outputActive ) + lineDirective( *outStream, sourceFileName, line ); + } + + if ( outputActive ) + *outStream << $3->tag->content; + }; + +ragel_def: + tag_ragel_def_head ragel_def_item_list '/' TAG_ragel_def + final { + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + cgd->redFsm->maxKey = cgd->findMaxKey(); + + cgd->redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + cgd->redFsm->findFirstFinState(); + + /* Call the user's callback. */ + cgd->finishRagelDef(); + }; + +tag_ragel_def_head: TAG_ragel_def + final { + char *fsmName = 0; + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr != 0 ) { + fsmName = nameAttr->value; + + CodeGenMapEl *mapEl = codeGenMap.find( fsmName ); + if ( mapEl != 0 ) + cgd = mapEl->value; + else { + cgd = makeCodeGen( sourceFileName, fsmName, *outStream, wantComplete ); + codeGenMap.insert( fsmName, cgd ); + } + } + else { + cgd = makeCodeGen( sourceFileName, fsmName, + *outStream, wantComplete ); + } + + ::keyOps = &cgd->thisKeyOps; + }; + +ragel_def_item_list: ragel_def_item_list ragel_def_item; +ragel_def_item_list: ; + +ragel_def_item: tag_alph_type; +ragel_def_item: tag_getkey_expr; +ragel_def_item: tag_access_expr; +ragel_def_item: tag_prepush_expr; +ragel_def_item: tag_postpop_expr; +ragel_def_item: tag_export_list; +ragel_def_item: tag_machine; +ragel_def_item: tag_p_expr; +ragel_def_item: tag_pe_expr; +ragel_def_item: tag_eof_expr; +ragel_def_item: tag_cs_expr; +ragel_def_item: tag_top_expr; +ragel_def_item: tag_stack_expr; +ragel_def_item: tag_act_expr; +ragel_def_item: tag_tokstart_expr; +ragel_def_item: tag_tokend_expr; +ragel_def_item: tag_data_expr; + +tag_export_list: TAG_exports export_list '/' TAG_exports; + +export_list: export_list tag_export; +export_list: ; + +tag_export: TAG_ex '/' TAG_ex + final { + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr == 0 ) + error($1->loc) << "tag <ex> requires a name attribute" << endp; + else { + char *td = $3->tag->content; + Key exportKey = readKey( td, &td ); + cgd->exportList.append( new Export( nameAttr->value, exportKey ) ); + } + }; + +tag_alph_type: TAG_alphtype '/' TAG_alphtype + final { + if ( ! cgd->setAlphType( $3->tag->content ) ) + error($1->loc) << "tag <alphtype> specifies unknown alphabet type" << endp; + }; + +tag_getkey_expr: TAG_getkey inline_list '/' TAG_getkey + final { + cgd->getKeyExpr = $2->inlineList; + }; + +tag_access_expr: TAG_access inline_list '/' TAG_access + final { + cgd->accessExpr = $2->inlineList; + }; + +tag_prepush_expr: TAG_prepush inline_list '/' TAG_prepush + final { + cgd->prePushExpr = $2->inlineList; + }; + +tag_postpop_expr: TAG_postpop inline_list '/' TAG_postpop + final { + cgd->postPopExpr = $2->inlineList; + }; + +tag_p_expr: TAG_p_expr inline_list '/' TAG_p_expr + final { cgd->pExpr = $2->inlineList; }; +tag_pe_expr: TAG_pe_expr inline_list '/' TAG_pe_expr + final { cgd->peExpr = $2->inlineList; }; +tag_eof_expr: TAG_eof_expr inline_list '/' TAG_eof_expr + final { cgd->eofExpr = $2->inlineList; }; +tag_cs_expr: TAG_cs_expr inline_list '/' TAG_cs_expr + final { cgd->csExpr = $2->inlineList; }; +tag_top_expr: TAG_top_expr inline_list '/' TAG_top_expr + final { cgd->topExpr = $2->inlineList; }; +tag_stack_expr: TAG_stack_expr inline_list '/' TAG_stack_expr + final { cgd->stackExpr = $2->inlineList; }; +tag_act_expr: TAG_act_expr inline_list '/' TAG_act_expr + final { cgd->actExpr = $2->inlineList; }; +tag_tokstart_expr: TAG_tokstart_expr inline_list '/' TAG_tokstart_expr + final { cgd->tokstartExpr = $2->inlineList; }; +tag_tokend_expr: TAG_tokend_expr inline_list '/' TAG_tokend_expr + final { cgd->tokendExpr = $2->inlineList; }; +tag_data_expr: TAG_data_expr inline_list '/' TAG_data_expr + final { cgd->dataExpr = $2->inlineList; }; + + +tag_write: tag_write_head write_option_list '/' TAG_write + final { + /* Terminate the options list and call the write statement handler. */ + writeOptions.append(0); + cgd->writeStatement( $1->loc, writeOptions.length()-1, writeOptions.data ); + + /* Clear the options in prep for the next write statement. */ + writeOptions.empty(); + }; + +nonterm tag_write_head +{ + InputLoc loc; +}; + +tag_write_head: TAG_write + final { + Attribute *nameAttr = $1->tag->findAttr( "def_name" ); + Attribute *lineAttr = $1->tag->findAttr( "line" ); + Attribute *colAttr = $1->tag->findAttr( "col" ); + + if ( nameAttr == 0 ) + error($1->loc) << "tag <write> requires a def_name attribute" << endp; + if ( lineAttr == 0 ) + error($1->loc) << "tag <write> requires a line attribute" << endp; + if ( colAttr == 0 ) + error($1->loc) << "tag <write> requires a col attribute" << endp; + + if ( nameAttr != 0 && lineAttr != 0 && colAttr != 0 ) { + $$->loc.line = atoi(lineAttr->value); + $$->loc.col = atoi(colAttr->value); + + CodeGenMapEl *mapEl = codeGenMap.find( nameAttr->value ); + if ( mapEl == 0 ) { + source_error($$->loc) << "write statement given " + "but there are no machine instantiations" << endp; + } + else { + cgd = mapEl->value; + ::keyOps = &cgd->thisKeyOps; + } + } + }; + + +write_option_list: write_option_list tag_arg; +write_option_list: ; + +nonterm tag_arg +{ + char *option; +}; + +tag_arg: TAG_arg '/' TAG_arg + final { + writeOptions.append( $3->tag->content ); + }; + +tag_machine: tag_machine_head machine_item_list '/' TAG_machine + final { + cgd->closeMachine(); + }; + +tag_machine_head: TAG_machine + final { + cgd->createMachine(); + }; + +machine_item_list: machine_item_list machine_item; +machine_item_list: ; + +machine_item: tag_start_state; +machine_item: tag_error_state; +machine_item: tag_entry_points; +machine_item: tag_state_list; +machine_item: tag_action_list; +machine_item: tag_action_table_list; +machine_item: tag_cond_space_list; + +# +# States. +# + +tag_start_state: TAG_start_state '/' TAG_start_state + final { + unsigned long startState = strtoul( $3->tag->content, 0, 10 ); + cgd->setStartState( startState ); + }; + +tag_error_state: TAG_error_state '/' TAG_error_state + final { + unsigned long errorState = strtoul( $3->tag->content, 0, 10 ); + cgd->setErrorState( errorState ); + }; + +tag_entry_points: TAG_entry_points entry_point_list '/' TAG_entry_points + final { + Attribute *errorAttr = $1->tag->findAttr( "error" ); + if ( errorAttr != 0 ) + cgd->setForcedErrorState(); + }; + +entry_point_list: entry_point_list tag_entry; +entry_point_list: ; + +tag_entry: TAG_entry '/' TAG_entry + final { + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( nameAttr == 0 ) { + error($1->loc) << "tag <entry_points>::<entry> " + "requires a name attribute" << endp; + } + else { + char *data = $3->tag->content; + unsigned long entry = strtoul( data, &data, 10 ); + cgd->addEntryPoint( nameAttr->value, entry ); + } + }; + +tag_state_list: tag_state_list_head state_list '/' TAG_state_list; + +tag_state_list_head: TAG_state_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <state_list> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initStateList( length ); + curState = 0; + } + }; + +state_list: state_list tag_state; +state_list: ; + +tag_state: TAG_state state_item_list '/' TAG_state + final { + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( idAttr == 0 ) + error($1->loc) << "tag <state> requires an id attribute" << endp; + else { + int id = atoi( idAttr->value ); + cgd->setId( curState, id ); + } + + Attribute *lengthAttr = $1->tag->findAttr( "final" ); + if ( lengthAttr != 0 ) + cgd->setFinal( curState ); + curState += 1; + }; + +state_item_list: state_item_list state_item; +state_item_list: ; + +state_item: tag_state_actions; +state_item: tag_eof_t; +state_item: tag_state_cond_list; +state_item: tag_trans_list; + +tag_state_actions: TAG_state_actions '/' TAG_state_actions + final { + char *ad = $3->tag->content; + + long toStateAction = readOffsetPtr( ad, &ad ); + long fromStateAction = readOffsetPtr( ad, &ad ); + long eofAction = readOffsetPtr( ad, &ad ); + + cgd->setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + }; + +tag_eof_t: TAG_eof_t '/' TAG_eof_t + final { + char *et = $3->tag->content; + long targ = readOffsetPtr( et, &et ); + long eofAction = readOffsetPtr( et, &et ); + + cgd->setEofTrans( curState, targ, eofAction ); + }; + +tag_state_cond_list: tag_state_cond_list_head state_cond_list '/' TAG_cond_list; + +tag_state_cond_list_head: TAG_cond_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <cond_list> requires a length attribute" << endp; + else { + ulong length = readLength( lengthAttr->value ); + cgd->initStateCondList( curState, length ); + curStateCond = 0; + } + }; + +state_cond_list: state_cond_list state_cond; +state_cond_list: ; + +state_cond: TAG_c '/' TAG_c + final { + char *td = $3->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long condId = readOffsetPtr( td, &td ); + cgd->addStateCond( curState, lowKey, highKey, condId ); + }; + +tag_trans_list: tag_trans_list_head trans_list '/' TAG_trans_list + final { + cgd->finishTransList( curState ); + }; + +tag_trans_list_head: TAG_trans_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <trans_list> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initTransList( curState, length ); + curTrans = 0; + } + }; + +trans_list: trans_list tag_trans; +trans_list: ; + +tag_trans: TAG_t '/' TAG_t + final { + char *td = $3->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long targ = readOffsetPtr( td, &td ); + long action = readOffsetPtr( td, &td ); + + cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action ); + }; + +# +# Action Lists. +# + +tag_action_list: tag_action_list_head action_list '/' TAG_action_list; + +tag_action_list_head: TAG_action_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <action_list> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionList( length ); + curAction = 0; + } + }; + +action_list: action_list tag_action; +action_list: ; + +# +# Actions. +# + +tag_action: TAG_action inline_list '/' TAG_action + final { + Attribute *lineAttr = $1->tag->findAttr( "line" ); + Attribute *colAttr = $1->tag->findAttr( "col" ); + Attribute *nameAttr = $1->tag->findAttr( "name" ); + if ( lineAttr == 0 || colAttr == 0) + error($1->loc) << "tag <action> requires a line and col attributes" << endp; + else { + unsigned long line = strtoul( lineAttr->value, 0, 10 ); + unsigned long col = strtoul( colAttr->value, 0, 10 ); + + char *name = 0; + if ( nameAttr != 0 ) + name = nameAttr->value; + + cgd->newAction( curAction++, name, line, col, $2->inlineList ); + } + }; + +nonterm inline_list +{ + GenInlineList *inlineList; +}; + + +inline_list: inline_list inline_item + final { + /* Append the item to the list, return the list. */ + $1->inlineList->append( $2->inlineItem ); + $$->inlineList = $1->inlineList; + }; + +inline_list: + final { + /* Start with empty list. */ + $$->inlineList = new GenInlineList; + }; + +nonterm inline_item_type +{ + GenInlineItem *inlineItem; +}; + +nonterm inline_item uses inline_item_type; + +inline_item: tag_text final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_goto final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_call final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_next final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_goto_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_call_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_next_expr final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_ret final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_break final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_pchar final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_char final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_hold final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_exec final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_curs final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_targs final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_il_entry final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_init_tokstart final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_init_act final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_get_tokend final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_tokstart final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_tokend final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_set_act final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_sub_action final { $$->inlineItem = $1->inlineItem; }; +inline_item: tag_lm_switch final { $$->inlineItem = $1->inlineItem; }; + +nonterm tag_text uses inline_item_type; +nonterm tag_goto uses inline_item_type; +nonterm tag_call uses inline_item_type; +nonterm tag_next uses inline_item_type; +nonterm tag_goto_expr uses inline_item_type; +nonterm tag_call_expr uses inline_item_type; +nonterm tag_next_expr uses inline_item_type; +nonterm tag_ret uses inline_item_type; +nonterm tag_break uses inline_item_type; +nonterm tag_pchar uses inline_item_type; +nonterm tag_char uses inline_item_type; +nonterm tag_hold uses inline_item_type; +nonterm tag_exec uses inline_item_type; +nonterm tag_curs uses inline_item_type; +nonterm tag_targs uses inline_item_type; +nonterm tag_il_entry uses inline_item_type; +nonterm tag_init_tokstart uses inline_item_type; +nonterm tag_init_act uses inline_item_type; +nonterm tag_get_tokend uses inline_item_type; +nonterm tag_set_tokstart uses inline_item_type; +nonterm tag_set_tokend uses inline_item_type; +nonterm tag_set_act uses inline_item_type; +nonterm tag_sub_action uses inline_item_type; +nonterm tag_lm_switch uses inline_item_type; + +tag_text: TAG_text '/' TAG_text + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Text ); + $$->inlineItem->data = $3->tag->content; + }; + +tag_goto: TAG_goto '/' TAG_goto + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Goto ); + $$->inlineItem->targId = targ; + }; + +tag_call: TAG_call '/' TAG_call + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Call ); + $$->inlineItem->targId = targ; + }; + +tag_next: TAG_next '/' TAG_next + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Next ); + $$->inlineItem->targId = targ; + }; + +tag_goto_expr: TAG_goto_expr inline_list '/' TAG_goto_expr + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::GotoExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_call_expr: TAG_call_expr inline_list '/' TAG_call_expr + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::CallExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_next_expr: TAG_next_expr inline_list '/' TAG_next_expr + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::NextExpr ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_ret: TAG_ret '/' TAG_ret + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Ret ); + }; + +tag_break: TAG_break '/' TAG_break + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Break ); + }; + +tag_pchar: TAG_pchar '/' TAG_pchar + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::PChar ); + }; + +tag_char: TAG_char '/' TAG_char + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Char ); + }; + +tag_hold: TAG_hold '/' TAG_hold + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Hold ); + }; + +tag_exec: TAG_exec inline_list '/' TAG_exec + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Exec ); + $$->inlineItem->children = $2->inlineList; + }; + +tag_curs: TAG_curs '/' TAG_curs + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Curs ); + }; + +tag_targs: TAG_targs '/' TAG_targs + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Targs ); + }; + +tag_il_entry: TAG_entry '/' TAG_entry + final { + int targ = strtol( $3->tag->content, 0, 10 ); + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::Entry ); + $$->inlineItem->targId = targ; + }; + +tag_init_tokstart: TAG_init_tokstart '/' TAG_init_tokstart + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmInitTokStart ); + }; + +tag_init_act: TAG_init_act '/' TAG_init_act + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmInitAct ); + }; + +tag_get_tokend: TAG_get_tokend '/' TAG_get_tokend + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmGetTokEnd ); + }; + +tag_set_tokstart: TAG_set_tokstart '/' TAG_set_tokstart + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokStart ); + cgd->hasLongestMatch = true; + }; + +tag_set_tokend: TAG_set_tokend '/' TAG_set_tokend + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetTokEnd ); + $$->inlineItem->offset = strtol( $3->tag->content, 0, 10 ); + }; + +tag_set_act: TAG_set_act '/' TAG_set_act + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSetActId ); + $$->inlineItem->lmId = strtol( $3->tag->content, 0, 10 ); + }; + +tag_sub_action: TAG_sub_action inline_list '/' TAG_sub_action + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::SubAction ); + $$->inlineItem->children = $2->inlineList; + }; + +# Action switches. +tag_lm_switch: TAG_lm_switch lm_action_list '/' TAG_lm_switch + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::LmSwitch ); + $$->inlineItem->children = $2->inlineList; + }; + +nonterm lm_action_list +{ + GenInlineList *inlineList; +}; + +lm_action_list: lm_action_list tag_inline_action + final { + $$->inlineList = $1->inlineList; + $$->inlineList->append( $2->inlineItem ); + }; +lm_action_list: + final { + $$->inlineList = new GenInlineList; + }; + +nonterm tag_inline_action uses inline_item_type; + +tag_inline_action: TAG_sub_action inline_list '/' TAG_sub_action + final { + $$->inlineItem = new GenInlineItem( InputLoc(), GenInlineItem::SubAction ); + $$->inlineItem->children = $2->inlineList; + + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( idAttr != 0 ) { + unsigned long id = strtoul( idAttr->value, 0, 10 ); + $$->inlineItem->lmId = id; + } + }; + +# +# Lists of Actions. +# + +tag_action_table_list: + tag_action_table_list_head action_table_list '/' TAG_action_table_list; + +tag_action_table_list_head: TAG_action_table_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + error($1->loc) << "tag <action_table_list> requires " + "a length attribute" << endp; + } + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionTableList( length ); + curActionTable = 0; + } + }; + +action_table_list: action_table_list tag_action_table; +action_table_list: ; + +tag_action_table: TAG_action_table '/' TAG_action_table + final { + /* Find the length of the action table. */ + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <at> requires a length attribute" << endp; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + + /* Collect the action table. */ + RedAction *redAct = cgd->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + char *ptr = $3->tag->content; + int pos = 0; + while ( *ptr != 0 ) { + unsigned long actionId = strtoul( ptr, &ptr, 10 ); + redAct->key[pos].key = 0; + redAct->key[pos].value = cgd->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + cgd->redFsm->actionMap.insert( redAct ); + } + + curActionTable += 1; + }; + +# +# Conditions. +# + +tag_cond_space_list: tag_cond_space_list_head cond_space_list '/' TAG_cond_space_list; + +tag_cond_space_list_head: TAG_cond_space_list + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + error($1->loc) << "tag <cond_space_list> " + "requires a length attribute" << endp; + } + else { + ulong length = readLength( lengthAttr->value ); + cgd->initCondSpaceList( length ); + curCondSpace = 0; + } + }; + +cond_space_list: cond_space_list tag_cond_space; +cond_space_list: tag_cond_space; + +tag_cond_space: TAG_cond_space '/' TAG_cond_space + final { + Attribute *lengthAttr = $1->tag->findAttr( "length" ); + Attribute *idAttr = $1->tag->findAttr( "id" ); + if ( lengthAttr == 0 ) + error($1->loc) << "tag <cond_space> requires a length attribute" << endp; + else { + if ( lengthAttr == 0 ) + error($1->loc) << "tag <cond_space> requires an id attribute" << endp; + else { + unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 ); + ulong length = readLength( lengthAttr->value ); + + char *td = $3->tag->content; + Key baseKey = readKey( td, &td ); + + cgd->newCondSpace( curCondSpace, condSpaceId, baseKey ); + for ( ulong a = 0; a < length; a++ ) { + long actionOffset = readOffsetPtr( td, &td ); + cgd->condSpaceItem( curCondSpace, actionOffset ); + } + curCondSpace += 1; + } + } + }; + +}%% + +%%{ + write types; + write data; +}%% + +void XmlParser::init() +{ + %% write init; +} + +int XmlParser::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + + +unsigned long readLength( char *td ) +{ + return strtoul( td, 0, 10 ); +} + +Key readKey( char *td, char **end ) +{ + if ( keyOps->isSigned ) + return Key( strtol( td, end, 10 ) ); + else + return Key( strtoul( td, end, 10 ) ); +} + +long readOffsetPtr( char *td, char **end ) +{ + while ( *td == ' ' || *td == '\t' ) + td++; + + if ( *td == 'x' ) { + if ( end != 0 ) + *end = td + 1; + return -1; + } + + return strtol( td, end, 10 ); +} + +ostream &XmlParser::warning( const InputLoc &loc ) +{ + cerr << fileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &XmlParser::error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + +ostream &XmlParser::parser_error( int tokId, Token &token ) +{ + gblErrorCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << token.loc.line << ":" << token.loc.col; + if ( token.tag != 0 ) { + if ( token.tag->tagId == 0 ) + cerr << ": at unknown tag"; + else + cerr << ": at tag <" << token.tag->tagId->name << ">"; + } + cerr << ": "; + + return cerr; +} + +ostream &XmlParser::source_error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( sourceFileName != 0 ); + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + +int XmlParser::token( int tokenId, Token &tok ) +{ + int res = parseLangEl( tokenId, &tok ); + if ( res < 0 ) + parser_error( tokenId, tok ) << "parse error" << endp; + return res; +} + +int XmlParser::token( int tokenId, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = 0; + return token( tokenId, tok ); +} + +int XmlParser::token( XMLTag *tag, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = tag; + + if ( tag->type == XMLTag::Close ) { + int res = token( '/', tok ); + if ( res < 0 ) + return res; + } + + tok.tag = tag; + return token( tag->tagId != 0 ? tag->tagId->id : TAG_unknown, tok ); +} diff --git a/libfsm/xmlscan.rl b/libfsm/xmlscan.rl new file mode 100644 index 00000000..4e9ee4e2 --- /dev/null +++ b/libfsm/xmlscan.rl @@ -0,0 +1,315 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <string.h> +#include "vector.h" +#include "xmlparse.h" + +using std::istream; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine XmlScanner; + write data; +}%% + +class Perfect_Hash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); + +public: + static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len); +}; + +XmlScanner::XmlScanner( const char *fileName, istream &input ) : + fileName(fileName), + input(input), + curline(1), + curcol(1), + p(0), pe(0), + done(false), + data(0), data_len(0), + value(0) +{ + %%{ + machine XmlScanner; + write init; + }%% +} + +#define TK_NO_TOKEN (-1) +#define TK_ERR 1 +#define TK_SPACE 2 +#define TK_EOF 3 +#define TK_OpenTag 4 +#define TK_CloseTag 5 + +#define ret_tok( _tok ) token = (_tok); data = ts + +void XmlScanner::adjustAttrPointers( int distance ) +{ + for ( AttrMkList::Iter attr = attrMkList; attr.lte(); attr++ ) { + attr->id -= distance; + attr->value -= distance; + } +} + +/* There is no claim that this is a proper XML parser, but it is good + * enough for our purposes. */ +%%{ + machine XmlScanner; + + action colup { curcol++; } + action start_tok { token_col = curcol; token_line = curline; } + NL = '\n' @{ curcol = 0; curline++; }; + + WS = [\r\t ] | NL; + id = [_a-zA-Z][_a-zA-Z0-9]*; + literal = '"' ( [^"] | NL )* '"'; + + # Attribute identifiers. + action start_attr_id { attr_id_start = p; } + action leave_attr_id { attr_id_len = p - attr_id_start; } + + attr_id = id >start_attr_id %leave_attr_id; + + # Attribute values + action start_attr_value { attr_value_start = p; } + action leave_attr_value + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } + + attr_value = literal >start_attr_value %leave_attr_value; + + # Attribute list. + attribute = attr_id WS* '=' WS* attr_value WS*; + + # Tag identifiers. + action tag_id_start { tag_id_start = p; } + action leave_tag_id { tag_id_len = p - tag_id_start; } + + tag_id = id >tag_id_start %leave_tag_id; + + main := |* + # Tags + ( '<' WS* tag_id ( WS+ attribute* )? '>' ) >start_tok $colup + => { ret_tok( TK_OpenTag ); fbreak; }; + + ( '<' WS* '/' WS* tag_id WS* '>' ) >start_tok $colup + => { ret_tok( TK_CloseTag ); fbreak; }; + + # Data in between tags. + ( [^<&\0] | NL ) $colup + => { buffer.append( *p ); }; + + # Specials. + "&" $colup + => { buffer.append( '&' ); }; + "<" $colup + => { buffer.append( '<' ); }; + ">" $colup + => { buffer.append( '>' ); }; + + # EOF + 0 >start_tok => { ret_tok( TK_EOF ); fbreak; }; + + *|; +}%% + +int XmlScanner::scan( ) +{ + int token = TK_NO_TOKEN; + int space = 0, readlen = 0; + char *attr_id_start = 0; + char *attr_value_start = 0; + int attr_id_len = 0; + int attr_value_len = 0; + + attrMkList.empty(); + buffer.clear(); + + while ( 1 ) { + if ( p == pe ) { + //printf("scanner: need more data\n"); + + if ( ts == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + //printf("scanner: buffer broken mid token\n"); + have = pe - ts; + memmove( buf, ts, have ); + + int distance = ts - buf; + te -= distance; + tag_id_start -= distance; + attr_id_start -= distance; + attr_value_start -= distance; + adjustAttrPointers( distance ); + ts = buf; + } + + p = buf + have; + space = XML_BUFSIZE - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + return TK_SPACE; + } + + if ( done ) { + //printf("scanner: end of file\n"); + p[0] = 0; + readlen = 1; + } + else { + input.read( p, space ); + readlen = input.gcount(); + if ( input.eof() ) { + //printf("scanner: setting done flag\n"); + done = 1; + } + } + + pe = p + readlen; + } + + %% write exec; + + if ( cs == XmlScanner_error ) + return TK_ERR; + + if ( token != TK_NO_TOKEN ) { + data_len = p - data; + return token; + } + } +} + +int xml_parse( std::istream &input, const char *fileName, + bool outputActive, bool wantComplete, + XmlScanner &scanner, XmlParser &parser ) +{ + while ( 1 ) { + int token = scanner.scan(); + if ( token == TK_NO_TOKEN ) { + cerr << "xmlscan: interal error: scanner returned NO_TOKEN" << endl; + exit(1); + } + else if ( token == TK_EOF ) { + parser.token( XmlParser_tk_eof, scanner.token_col, scanner.token_line ); + break; + } + else if ( token == TK_ERR ) { + scanner.error() << "scanner error" << endl; + break; + } + else if ( token == TK_SPACE ) { + scanner.error() << "scanner is out of buffer space" << endl; + break; + } + else { + /* All other tokens are either open or close tags. */ + XMLTagHashPair *tagId = Perfect_Hash::in_word_set( + scanner.tag_id_start, scanner.tag_id_len ); + + XMLTag *tag = new XMLTag( tagId, token == TK_OpenTag ? + XMLTag::Open : XMLTag::Close ); + + if ( tagId != 0 ) { + /* Get attributes for open tags. */ + if ( token == TK_OpenTag && scanner.attrMkList.length() > 0 ) { + tag->attrList = new AttrList; + for ( AttrMkList::Iter attr = scanner.attrMkList; + attr.lte(); attr++ ) + { + Attribute newAttr; + newAttr.id = new char[attr->idLen+1]; + memcpy( newAttr.id, attr->id, attr->idLen ); + newAttr.id[attr->idLen] = 0; + + /* Exclude the surrounding quotes. */ + newAttr.value = new char[attr->valueLen-1]; + memcpy( newAttr.value, attr->value+1, attr->valueLen-2 ); + newAttr.value[attr->valueLen-2] = 0; + + tag->attrList->append( newAttr ); + } + } + + /* Get content for closing tags. */ + if ( token == TK_CloseTag ) { + switch ( tagId->id ) { + case TAG_host: case TAG_arg: + case TAG_t: case TAG_alphtype: + case TAG_text: case TAG_goto: + case TAG_call: case TAG_next: + case TAG_entry: case TAG_set_tokend: + case TAG_set_act: case TAG_start_state: + case TAG_error_state: case TAG_state_actions: + case TAG_action_table: case TAG_cond_space: + case TAG_c: case TAG_ex: case TAG_eof_t: + tag->content = new char[scanner.buffer.length+1]; + memcpy( tag->content, scanner.buffer.data, + scanner.buffer.length ); + tag->content[scanner.buffer.length] = 0; + break; + } + } + } + + #if 0 + cerr << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") << + ": " << (tag->tagId != 0 ? tag->tagId->name : "<unknown>") << endl; + if ( tag->attrList != 0 ) { + for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ ) + cerr << " " << attr->id << ": " << attr->value << endl; + } + if ( tag->content != 0 ) + cerr << " content: " << tag->content << endl; + #endif + + parser.token( tag, scanner.token_col, scanner.token_line ); + } + } + + return 0; +} + +std::ostream &XmlScanner::error() +{ + gblErrorCount += 1; + cerr << fileName << ":" << curline << ":" << curcol << ": "; + return cerr; +} diff --git a/libfsm/xmltags.gperf b/libfsm/xmltags.gperf new file mode 100644 index 00000000..1ca544f7 --- /dev/null +++ b/libfsm/xmltags.gperf @@ -0,0 +1,95 @@ +/* + * Copyright 2005 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +%{ +#include <string.h> +#include "xmlparse.h" +%} +%compare-strncmp +struct XMLTagHashPair; +%% +ragel, TAG_ragel +ragel_def, TAG_ragel_def +host, TAG_host +state_list, TAG_state_list +state, TAG_state +trans_list, TAG_trans_list +t, TAG_t +machine, TAG_machine +start_state, TAG_start_state +error_state, TAG_error_state +action_list, TAG_action_list +action, TAG_action +action_table_list, TAG_action_table_list +action_table, TAG_action_table +alphtype, TAG_alphtype +getkey, TAG_getkey +state_actions, TAG_state_actions +entry_points, TAG_entry_points +text, TAG_text +goto, TAG_goto +call, TAG_call +next, TAG_next +goto_expr, TAG_goto_expr +call_expr, TAG_call_expr +next_expr, TAG_next_expr +ret, TAG_ret +pchar, TAG_pchar +char, TAG_char +hold, TAG_hold +exec, TAG_exec +curs, TAG_curs +targs, TAG_targs +entry, TAG_entry +data, TAG_data +lm_switch, TAG_lm_switch +sub_action, TAG_sub_action +init_act, TAG_init_act +set_act, TAG_set_act +get_tokend, TAG_get_tokend +set_tokend, TAG_set_tokend +init_tokstart, TAG_init_tokstart +set_tokstart, TAG_set_tokstart +write, TAG_write +access, TAG_access +break, TAG_break +arg, TAG_arg +cond_space_list, TAG_cond_space_list +cond_space, TAG_cond_space +cond_list, TAG_cond_list +c, TAG_c +exports, TAG_exports +ex, TAG_ex +p_expr, TAG_p_expr +pe_expr, TAG_pe_expr +eof_expr, TAG_eof_expr +cs_expr, TAG_cs_expr +top_expr, TAG_top_expr +stack_expr, TAG_stack_expr +act_expr, TAG_act_expr +tokstart_expr, TAG_tokstart_expr +tokend_expr, TAG_tokend_expr +data_expr, TAG_data_expr +prepush, TAG_prepush +postpop, TAG_postpop +eof_t, TAG_eof_t |