diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2014-10-13 19:14:30 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2014-10-13 19:14:30 +0000 |
commit | eafd7a3974e8605fd02794269db6114a3446e016 (patch) | |
tree | 064737b35dbe10f2995753ead92f95bac30ba048 /test | |
download | ragel-tarball-eafd7a3974e8605fd02794269db6114a3446e016.tar.gz |
ragel-6.9ragel-6.9
Diffstat (limited to 'test')
102 files changed, 17867 insertions, 0 deletions
diff --git a/test/Makefile.am b/test/Makefile.am new file mode 100644 index 0000000..e27bd5b --- /dev/null +++ b/test/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright 2002-2009 Adrian Thurston <thurston@complang.org> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +TESTS = runtests + +EXTRA_DIST = \ + atoi1.rl clang2.rl cond7.rl element3.rl erract8.rl forder3.rl java1.rl \ + range.rl scan3.rl xml.rl atoi2.rl clang3.rl cppscan1.rl eofact.rl \ + erract9.rl gotocallret1.rl java2.rl recdescent1.rl scan4.rl atoi3.rl \ + clang4.rl cppscan2.rl erract1.rl export1.rl gotocallret2.rl keller1.rl \ + recdescent2.rl stateact1.rl awkemu.rl cond1.rl cppscan3.rl erract2.rl \ + export2.rl high1.rl lmgoto.rl recdescent3.rl statechart1.rl builtin.rl \ + cond2.rl cppscan4.rl erract3.rl export3.rl high2.rl mailbox1.rl \ + repetition.rl strings1.rl call1.rl cond3.rl cppscan5.rl erract4.rl \ + export4.rl high3.rl mailbox2.rl rlscan.rl strings2.rl call2.rl cond4.rl \ + cppscan6.rl erract5.rl fnext1.rl import1.rl mailbox3.rl ruby1.rl \ + tokstart1.rl call3.rl cond5.rl element1.rl erract6.rl forder1.rl \ + include1.rl minimize1.rl scan1.rl union.rl clang1.rl cond6.rl \ + element2.rl erract7.rl forder2.rl include2.rl patact.rl scan2.rl \ + xmlcommon.rl langtrans_c.sh langtrans_csharp.sh langtrans_d.sh \ + langtrans_java.sh langtrans_ruby.sh checkeofact.txl \ + langtrans_csharp.txl langtrans_c.txl langtrans_d.txl langtrans_java.txl \ + langtrans_ruby.txl testcase.txl cppscan1.h eofact.h mailbox1.h strings2.h + +CLEANFILES = \ + *.c *.cpp *.m *.d *.java *.bin *.class *.exp \ + *.out *_c.rl *_d.rl *_java.rl *_ruby.rl *_csharp.rl *.cs \ + *_go.rl *.go *.exe diff --git a/test/Makefile.in b/test/Makefile.in new file mode 100644 index 0000000..77c3a38 --- /dev/null +++ b/test/Makefile.in @@ -0,0 +1,818 @@ +# Makefile.in generated by automake 1.14.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2013 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright 2002-2009 Adrian Thurston <thurston@complang.org> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +VPATH = @srcdir@ +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = test +DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(srcdir)/runtests.in $(top_srcdir)/test-driver README +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.in +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/ragel/config.h +CONFIG_CLEAN_FILES = runtests +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red='[0;31m'; \ + grn='[0;32m'; \ + lgn='[1;32m'; \ + blu='[1;34m'; \ + mgn='[0;35m'; \ + brg='[1m'; \ + std='[m'; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EXEEXT = @EXEEXT@ +FIG2DEV = @FIG2DEV@ +GDC = @GDC@ +GMCS = @GMCS@ +GOBIN = @GOBIN@ +GOBJC = @GOBJC@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVAC = @JAVAC@ +KELBT = @KELBT@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PDFLATEX = @PDFLATEX@ +PUBDATE = @PUBDATE@ +RAGEL = @RAGEL@ +RANLIB = @RANLIB@ +RUBY = @RUBY@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TXL = @TXL@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +TESTS = runtests +EXTRA_DIST = \ + atoi1.rl clang2.rl cond7.rl element3.rl erract8.rl forder3.rl java1.rl \ + range.rl scan3.rl xml.rl atoi2.rl clang3.rl cppscan1.rl eofact.rl \ + erract9.rl gotocallret1.rl java2.rl recdescent1.rl scan4.rl atoi3.rl \ + clang4.rl cppscan2.rl erract1.rl export1.rl gotocallret2.rl keller1.rl \ + recdescent2.rl stateact1.rl awkemu.rl cond1.rl cppscan3.rl erract2.rl \ + export2.rl high1.rl lmgoto.rl recdescent3.rl statechart1.rl builtin.rl \ + cond2.rl cppscan4.rl erract3.rl export3.rl high2.rl mailbox1.rl \ + repetition.rl strings1.rl call1.rl cond3.rl cppscan5.rl erract4.rl \ + export4.rl high3.rl mailbox2.rl rlscan.rl strings2.rl call2.rl cond4.rl \ + cppscan6.rl erract5.rl fnext1.rl import1.rl mailbox3.rl ruby1.rl \ + tokstart1.rl call3.rl cond5.rl element1.rl erract6.rl forder1.rl \ + include1.rl minimize1.rl scan1.rl union.rl clang1.rl cond6.rl \ + element2.rl erract7.rl forder2.rl include2.rl patact.rl scan2.rl \ + xmlcommon.rl langtrans_c.sh langtrans_csharp.sh langtrans_d.sh \ + langtrans_java.sh langtrans_ruby.sh checkeofact.txl \ + langtrans_csharp.txl langtrans_c.txl langtrans_d.txl langtrans_java.txl \ + langtrans_ruby.txl testcase.txl cppscan1.h eofact.h mailbox1.h strings2.h + +CLEANFILES = \ + *.c *.cpp *.m *.d *.java *.bin *.class *.exp \ + *.out *_c.rl *_d.rl *_java.rl *_ruby.rl *_csharp.rl *.cs \ + *_go.rl *.go *.exe + +all: all-am + +.SUFFIXES: +.SUFFIXES: .log .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +runtests: $(top_builddir)/config.status $(srcdir)/runtests.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + else \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +runtests.log: runtests + @p='runtests'; \ + b='runtests'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: check-am install-am install-strip + +.PHONY: all all-am check check-TESTS check-am clean clean-generic \ + cscopelist-am ctags-am distclean distclean-generic distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ + pdf-am ps ps-am recheck tags-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/README b/test/README new file mode 100644 index 0000000..beb0c03 --- /dev/null +++ b/test/README @@ -0,0 +1,13 @@ + +The test suite now depends on TXL. Since the trend in Ragel is towards +independence of the host-language, tests are now being written in a fictional +mini-language designed for the purpose of testing ragel. The host language +test-cases are then generated using a TXL transformation. This allows one test +case to be run against all host languages in addition to all code generation +styles. + +TXL is not open source, but a free download is available from the homepage. + +http://www.txl.ca/ + +-Adrian diff --git a/test/atoi1.rl b/test/atoi1.rl new file mode 100644 index 0000000..013348e --- /dev/null +++ b/test/atoi1.rl @@ -0,0 +1,69 @@ +/* + * @LANG: indep + */ +bool neg; +int val; +%% +val = 0; +neg = false; +%%{ + machine AtoI; + + action begin { + neg = false; + val = 0; + } + + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + <int>(fc - 48); + } + + action finish { + if ( neg ) { + val = -1 * val; + } + } + action print { + printi val; + prints "\n"; + } + + atoi = ( + ('-'@see_neg | '+')? (digit @add_digit)+ + ) >begin %finish; + + main := atoi '\n' @print; +}%% +/* _____INPUT_____ +"1\n" +"12\n" +"222222\n" +"+2123\n" +"213 3213\n" +"-12321\n" +"--123\n" +"-99\n" +" -3000\n" +_____INPUT_____ */ + +/* _____OUTPUT_____ +1 +ACCEPT +12 +ACCEPT +222222 +ACCEPT +2123 +ACCEPT +FAIL +-12321 +ACCEPT +FAIL +-99 +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/atoi2.rl b/test/atoi2.rl new file mode 100644 index 0000000..9f17c69 --- /dev/null +++ b/test/atoi2.rl @@ -0,0 +1,81 @@ +/* + * @LANG: indep + * This implementes an atoi machine using the statechart paradigm. + */ +bool neg; +int val; +%% +val = 0; +neg = false; +%%{ + machine StateChart; + + action begin { + neg = false; + val = 0; + } + + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + <int>(fc - 48); + } + + action finish { + if ( neg ) + val = -1 * val; + } + + atoi = ( + start: ( + '-' @see_neg ->om_num | + '+' ->om_num | + [0-9] @add_digit ->more_nums + ), + + # One or more nums. + om_num: ( + [0-9] @add_digit ->more_nums + ), + + # Zero ore more nums. + more_nums: ( + [0-9] @add_digit ->more_nums | + '' -> final + ) + ) >begin %finish; + + action oneof { printi val; prints "\n"; } + main := ( atoi '\n' @oneof )*; +}%% +/* _____INPUT_____ +"1\n" +"12\n" +"222222\n" +"+2123\n" +"213 3213\n" +"-12321\n" +"--123\n" +"-99\n" +" -3000\n" +_____INPUT_____ */ + +/* _____OUTPUT_____ +1 +ACCEPT +12 +ACCEPT +222222 +ACCEPT +2123 +ACCEPT +FAIL +-12321 +ACCEPT +FAIL +-99 +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/atoi3.rl b/test/atoi3.rl new file mode 100644 index 0000000..fcd4a41 --- /dev/null +++ b/test/atoi3.rl @@ -0,0 +1,75 @@ +# +# @LANG: ruby +# + +%%{ + machine atoi3; + action begin { + neg = false; + val = 0; + } + action see_neg { + neg = true; + } + action add_digit { + val = val * 10 + (fc - "0"[0]); + } + action finish { + val = -1 * val if neg + } + action print { + puts val; + } + atoi = (('-' @ see_neg | '+') ? (digit @ add_digit) +) > begin % finish; + main := atoi '\n' @ print; +}%% + +%% write data; + +def run_machine( data ) + p = 0; + pe = data.length + cs = 0 + val = 0; + neg = false; + + %% write init; + %% write exec; + if cs >= atoi3_first_final + puts "ACCEPT" + else + puts "FAIL" + end +end + +inp = [ + "1\n", + "12\n", + "222222\n", + "+2123\n", + "213 3213\n", + "-12321\n", + "--123\n", + "-99\n", + " -3000\n", +] + +inp.each { |str| run_machine(str) } + +=begin _____OUTPUT_____ +1 +ACCEPT +12 +ACCEPT +222222 +ACCEPT +2123 +ACCEPT +FAIL +-12321 +ACCEPT +FAIL +-99 +ACCEPT +FAIL +=end _____OUTPUT_____ diff --git a/test/awkemu.rl b/test/awkemu.rl new file mode 100644 index 0000000..343f3e6 --- /dev/null +++ b/test/awkemu.rl @@ -0,0 +1,155 @@ +/* + * @LANG: c + */ + +/* + * Emulate the basic parser of the awk program. Breaks lines up into + * words and prints the words. + */ + +#include <stdio.h> +#include <string.h> + +#define LINEBUF 2048 +static char lineBuf[LINEBUF]; +static char blineBuf[LINEBUF]; +static int lineLen; +static int blineLen; +static int words; + +void finishLine(); + +struct awkemu +{ + int cs; +}; + +%%{ + machine awkemu; + + variable cs fsm->cs; + + # Starts a line. Will initialize all the data necessary for capturing the line. + action startline { + lineLen = 0; + blineLen = 0; + words = 0; + } + + # Will be executed on every character seen in a word. Captures the word + # to the broken up line buffer. + action wordchar { + blineBuf[blineLen++] = fc; + } + + # Terminate a word. Adds the null after the word and increments the word count + # for the line. + action termword { + blineBuf[blineLen++] = 0; + words += 1; + } + + # Will be executed on every character seen in a line (not including + # the newline itself. + action linechar { + lineBuf[lineLen++] = fc; + } + + # This section of the machine deals with breaking up lines into fields. + # Lines are separed by the whitespace and put in an array of words. + + # Words in a line. + word = (extend - [ \t\n])+; + + # The whitespace separating words in a line. + whitespace = [ \t]; + + # The components in a line to break up. Either a word or a single char of + # whitespace. On the word capture characters. + blineElements = word $wordchar %termword | whitespace; + + # Star the break line elements. Just be careful to decrement the leaving + # priority as we don't want multiple character identifiers to be treated as + # multiple single char identifiers. + breakLine = ( blineElements $1 %0 )* . '\n'; + + # This machine lets us capture entire lines. We do it separate from the words + # in a line. + bufLine = (extend - '\n')* $linechar %{ finishLine(); } . '\n'; + + # A line can then consist of the machine that will break up the line into + # words and a machine that will buffer the entire line. + line = ( breakLine | bufLine ) > startline; + + # Any number of lines. + main := line*; +}%% + +void finishLine() +{ + int i; + char *pword = blineBuf; + lineBuf[lineLen] = 0; + printf("endline(%i): %s\n", words, lineBuf ); + for ( i = 0; i < words; i++ ) { + printf(" word: %s\n", pword ); + pword += strlen(pword) + 1; + } +} + +%% write data; + +void awkemu_init( struct awkemu *fsm ) +{ + %% write init; +} + +void awkemu_execute( struct awkemu *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + %% write exec; +} + +int awkemu_finish( struct awkemu *fsm ) +{ + if ( fsm->cs == awkemu_error ) + return -1; + if ( fsm->cs >= awkemu_first_final ) + return 1; + return 0; +} + +#include <stdio.h> +#define BUFSIZE 2048 + +struct awkemu fsm; +char buf[BUFSIZE]; + +void test( char *buf ) +{ + int len = strlen( buf ); + awkemu_init( &fsm ); + awkemu_execute( &fsm, buf, len ); + if ( awkemu_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "" ); + test( "one line with no newline" ); + test( "one line\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +FAIL +endline(2): one line + word: one + word: line +ACCEPT +#endif diff --git a/test/builtin.rl b/test/builtin.rl new file mode 100644 index 0000000..816b441 --- /dev/null +++ b/test/builtin.rl @@ -0,0 +1,1209 @@ +/* + * @LANG: c + */ + +#include <stdio.h> + +void alph(const char *type) +{ + printf("%s\n", type); +} + +struct builtin +{ + int cs; +}; + +%%{ + machine builtin; + alphtype unsigned int; + variable cs fsm->cs; + + main := ( + any @{alph("any");} | + ascii @{alph("ascii");} | + extend @{alph("extend");} | + alpha @{alph("alpha");} | + digit @{alph("digit");} | + alnum @{alph("alnum");} | + lower @{alph("lower");} | + upper @{alph("upper");} | + cntrl @{alph("cntrl");} | + graph @{alph("graph");} | + print @{alph("print");} | + punct @{alph("punct");} | + space @{alph("space");} | + xdigit @{alph("xdigit");} + )*; +}%% + +%% write data; + +void builtin_init( struct builtin *fsm ) +{ + %% write init; +} + +void builtin_execute( struct builtin *fsm, const unsigned int *data, int len ) +{ + const unsigned int *p = data; + const unsigned int *pe = data+len; + %% write exec; +} + +int builtin_finish( struct builtin *fsm ) +{ + if ( fsm->cs == builtin_error ) + return -1; + else if ( fsm->cs >= builtin_first_final ) + return 1; + return 0; +} + +#include <stdio.h> +#define BUFSIZE 2048 + +struct builtin fsm; +char buf[BUFSIZE]; +unsigned int i; + +int test( const unsigned int *data, int len ) +{ + builtin_init( &fsm ); + builtin_execute( &fsm, data, len ); + if ( builtin_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); + return 0; +} + +#define DLEN 258 +unsigned int data[DLEN] = { + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, + 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, + 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, + 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 +}; + +int main() +{ + test( data, DLEN ); + return 0; +} + +#ifdef _____OUTPUT_____ +any +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +space +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +cntrl +any +ascii +extend +print +space +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +digit +alnum +graph +print +xdigit +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +xdigit +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +alpha +alnum +upper +graph +print +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +xdigit +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +alpha +alnum +lower +graph +print +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +graph +print +punct +any +ascii +extend +cntrl +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +extend +any +ACCEPT +#endif diff --git a/test/call1.rl b/test/call1.rl new file mode 100644 index 0000000..ddd552a --- /dev/null +++ b/test/call1.rl @@ -0,0 +1,101 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +int num = 0; + +struct test +{ + int cs, top, stack[32]; +}; + +%%{ + machine test; + access fsm->; + + action check_num { + if ( num & 1 ) + fcall *fentry(odd); + else + fcall even; + } + + # Test call and return functionality. + even := 'even' any @{fhold; fret;}; + odd := 'odd' any @{fhold; fret;}; + num = [0-9]+ ${ num = num * 10 + (fc - '0'); }; + even_odd = num ' ' @check_num "\n"; + + # Test calls in out actions. + fail := !(any*); + out_acts = 'OA ok\n' | + 'OA error1\n' | + 'OA error2\n'; + + main := even_odd | out_acts; +}%% + +%% write data; + +void test_init( struct test *fsm ) +{ + num = 0; + %% write init; +} + +void test_execute( struct test *fsm, const char *data, int len ) +{ + const char *p = data; + const char *pe = data+len; + + %% write exec; +} + +int test_finish( struct test *fsm ) +{ + if ( fsm->cs == test_error ) + return -1; + if ( fsm->cs >= test_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + struct test test; + test_init( &test ); + test_execute( &test, buf, strlen(buf) ); + if ( test_finish( &test ) > 0 ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +int main() +{ + test( "78 even\n" ); + test( "89 odd\n" ); + test( "1 even\n" ); + test( "0 odd\n" ); + test( "OA ok\n" ); + test( "OA error1\n" ); + test( "OA error2\n" ); + + return 0; +} + + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +FAIL +ACCEPT +ACCEPT +ACCEPT +#endif diff --git a/test/call2.rl b/test/call2.rl new file mode 100644 index 0000000..a553855 --- /dev/null +++ b/test/call2.rl @@ -0,0 +1,116 @@ +/* + * @LANG: c++ + */ + +#include <stdio.h> +#include <string.h> + +int num = 0; + +struct CallTest +{ + int cs, top, stack[32]; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine CallTest; + + action check_num { + if ( num & 1 ) + fcall *fentry(odd); + else + fcall even; + } + + # Test call and return functionality. + even := 'even' any @{fhold; fret;}; + odd := 'odd' any @{fhold; fret;}; + num = [0-9]+ ${ num = num * 10 + (fc - '0'); }; + even_odd = num ' ' @check_num "\n"; + + # Test calls in out actions. + fail := !(any*); + out_acts = 'OA ok\n' | + 'OA error1\n' | + 'OA error2\n'; + + main := even_odd | out_acts; +}%% + +%% write data; + +void CallTest::init( ) +{ + num = 0; + %% write init; +} + +void CallTest::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data+len; + + %% write exec; +} + +int CallTest::finish( ) +{ + if ( this->cs == CallTest_error ) + return -1; + if ( this->cs >= CallTest_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +void test( const char *buf ) +{ + CallTest test; + + test.init(); + test.execute( buf, strlen(buf) ); + if ( test.finish() > 0 ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +int main() +{ + test( "78 even\n" ); + test( "89 odd\n" ); + test( "1 even\n" ); + test( "0 odd\n" ); + test( "OA ok\n" ); + test( "OA error1\n" ); + test( "OA error2\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +FAIL +ACCEPT +ACCEPT +ACCEPT +#endif diff --git a/test/call3.rl b/test/call3.rl new file mode 100644 index 0000000..b19b96c --- /dev/null +++ b/test/call3.rl @@ -0,0 +1,122 @@ +/* + * @LANG: obj-c + */ + +#include <stdio.h> +#include <string.h> +#include <objc/Object.h> + + +int num = 0; + +@interface CallTest : Object +{ +@public + /* State machine operation data. */ + int cs, top, stack[32]; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (void) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation CallTest + +%%{ + machine CallTest; + + action check_num { + if ( num & 1 ) + fcall odd; + else + fcall even; + } + + # Test call and return functionality. + even := 'even' any @{fhold; fret;}; + odd := 'odd' any @{fhold; fret;}; + num = [0-9]+ ${ num = num * 10 + (fc - '0'); }; + even_odd = num ' ' @check_num "\n"; + + # Test calls in out actions. + fail := !(any*); + out_acts = 'OA ok\n' | + 'OA error1\n' | + 'OA error2\n'; + + main := even_odd | out_acts; +}%% + +%% write data; + +- (void) initFsm; +{ + num = 0; + %% write init; +} + +- (void) executeWithData:(const char *)data len:(int)len; +{ + const char *p = data; + const char *pe = data + len; + %% write exec; +} + +- (int) finish; +{ + if ( cs == CallTest_error ) + return -1; + return ( cs >= CallTest_first_final ) ? 1 : 0; +} + +@end + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + CallTest *test = [[CallTest alloc] init]; + [test initFsm]; + [test executeWithData:buf len:strlen(buf)]; + if ( [test finish] > 0 ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +int main() +{ + test( "78 even\n" ); + test( "89 odd\n" ); + test( "1 even\n" ); + test( "0 odd\n" ); + test( "OA ok\n" ); + test( "OA error1\n" ); + test( "OA error2\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +FAIL +ACCEPT +ACCEPT +ACCEPT +#endif diff --git a/test/checkeofact.txl b/test/checkeofact.txl new file mode 100644 index 0000000..8189013 --- /dev/null +++ b/test/checkeofact.txl @@ -0,0 +1,95 @@ +include "testcase.txl" + +define program + [lang_indep] + | 'yes + | 'no +end define + +rule findEof1 + match [machine_expr_item] + '>/ +end rule + +rule findEof2 + match [machine_expr_item] + '</ +end rule + +rule findEof3 + match [machine_expr_item] + '$/ +end rule + +rule findEof4 + match [machine_expr_item] + '%/ +end rule + +rule findEof5 + match [machine_expr_item] + '@/ +end rule + +rule findEof6 + match [machine_expr_item] + '<>/ +end rule + +rule findEof7 + match [repeat machine_expr_item] + '> 'eof _ [repeat machine_expr_item] +end rule + +rule findEof8 + match [repeat machine_expr_item] + '< 'eof _ [repeat machine_expr_item] +end rule + +rule findEof9 + match [repeat machine_expr_item] + '$ 'eof _ [repeat machine_expr_item] +end rule + +rule findEof10 + match [repeat machine_expr_item] + '% 'eof _ [repeat machine_expr_item] +end rule + +rule findEof11 + match [repeat machine_expr_item] + '@ 'eof _ [repeat machine_expr_item] +end rule + +rule findEof12 + match [repeat machine_expr_item] + '<> 'eof _ [repeat machine_expr_item] +end rule + +rule findScanner + match [machine_expr_item] + '|* _ [repeat scanner_item] '*| +end rule + +function findEof P [program] + replace [program] + _ [program] + where + P + [findEof1] [findEof2] [findEof3] + [findEof4] [findEof5] [findEof6] + [findEof7] [findEof8] [findEof9] + [findEof10] [findEof11] [findEof12] + [findScanner] + by + 'yes +end function + +function main + replace [program] + P [program] + construct NewP [program] + 'no + by + NewP [findEof P] +end function diff --git a/test/clang1.rl b/test/clang1.rl new file mode 100644 index 0000000..85532c6 --- /dev/null +++ b/test/clang1.rl @@ -0,0 +1,283 @@ +/* + * @LANG: c + * A mini C-like language scanner. + */ + +#include <stdio.h> +#include <string.h> +#define IDENT_BUFLEN 256 + +%%{ + machine clang; + + # Function to buffer a character. + action bufChar { + if ( identLen < IDENT_BUFLEN ) { + identBuf[identLen] = fc; + identLen += 1; + } + } + + # Function to clear the buffer. + action clearBuf { + identLen = 0; + } + + # Functions to dump tokens as they are matched. + action ident { + identBuf[identLen] = 0; + printf("ident(%i): %s\n", curLine, identBuf); + } + action literal { + identBuf[identLen] = 0; + printf("literal(%i): %s\n", curLine, identBuf); + } + action float { + identBuf[identLen] = 0; + printf("float(%i): %s\n", curLine, identBuf); + } + action int { + identBuf[identLen] = 0; + printf("int(%i): %s\n", curLine, identBuf); + } + action hex { + identBuf[identLen] = 0; + printf("hex(%i): 0x%s\n", curLine, identBuf); + } + action symbol { + identBuf[identLen] = 0; + printf("symbol(%i): %s\n", curLine, identBuf); + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/*' . extend* $0 . '*/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + int = digit+ %int; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + int | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ curLine += 1; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; +}%% + +#include <stdio.h> + +%% write data noerror; + + +char data[] = + "/*\n" + " * Copyright\n" + " */\n" + "\n" + "/* Aapl.\n" + " */\n" + "\n" + "#define _AAPL_RESIZE_H\n" + "\n" + "#include <assert.h>\n" + "\n" + "#ifdef AAPL_NAMESPACE\n" + "namespace Aapl {\n" + "#endif\n" + "#define LIN_DEFAULT_STEP 256\n" + "#define EXPN_UP( existing, needed ) \\\n" + " need > eng ? (ned<<1) : eing\n" + " \n" + "\n" + "/*@}*/\n" + "#undef EXPN_UP\n" + "#ifdef AAPL_NAMESPACE\n" + "#endif /* _AAPL_RESIZE_H */\n"; + +void test( char *buf ) +{ + int len = strlen( buf ); + char *p = buf, *pe = buf + len; + char *eof = pe; + char identBuf[IDENT_BUFLEN+1]; + int identLen; + int curLine; + int cs; + + identLen = 0; + curLine = 1; + + %% write init; + %% write exec; + + if ( cs >= clang_first_final ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "999 0xaAFF99 99.99 /*\n" + "*/ 'lksdj' //\n" + "\"\n" + "\n" + "literal\n" + "\n" + "\n" + "\"0x00aba foobardd.ddsf 0x0.9\n" ); + test( + "wordwithnum00asdf\n" + "000wordfollowsnum,makes new symbol\n" + "\n" + "finishing early /* unfinished ...\n" ); + test( data ); + return 0; +} + +#ifdef _____OUTPUT_____ +int(1): 999 +hex(1): 0xaAFF99 +float(1): 99.99 +literal(2): lksdj +literal(8): + +literal + + + +hex(8): 0x00aba +ident(8): foobardd +symbol(8): . +ident(8): ddsf +hex(8): 0x0 +symbol(8): . +int(8): 9 +ACCEPT +ident(1): wordwithnum00asdf +int(2): 000 +ident(2): wordfollowsnum +symbol(2): , +ident(2): makes +ident(2): new +ident(2): symbol +ident(4): finishing +ident(4): early +FAIL +symbol(8): # +ident(8): define +ident(8): _AAPL_RESIZE_H +symbol(10): # +ident(10): include +symbol(10): < +ident(10): assert +symbol(10): . +ident(10): h +symbol(10): > +symbol(12): # +ident(12): ifdef +ident(12): AAPL_NAMESPACE +ident(13): namespace +ident(13): Aapl +symbol(13): { +symbol(14): # +ident(14): endif +symbol(15): # +ident(15): define +ident(15): LIN_DEFAULT_STEP +int(15): 256 +symbol(16): # +ident(16): define +ident(16): EXPN_UP +symbol(16): ( +ident(16): existing +symbol(16): , +ident(16): needed +symbol(16): ) +symbol(16): \ +ident(17): need +symbol(17): > +ident(17): eng +symbol(17): ? +symbol(17): ( +ident(17): ned +symbol(17): < +symbol(17): < +int(17): 1 +symbol(17): ) +symbol(17): : +ident(17): eing +symbol(21): # +ident(21): undef +ident(21): EXPN_UP +symbol(22): # +ident(22): ifdef +ident(22): AAPL_NAMESPACE +symbol(23): # +ident(23): endif +ACCEPT +#endif diff --git a/test/clang2.rl b/test/clang2.rl new file mode 100644 index 0000000..3b22e4b --- /dev/null +++ b/test/clang2.rl @@ -0,0 +1,324 @@ +/* + * @LANG: obj-c + * A mini C-like language scanner. + */ + +#include <stdio.h> +#include <objc/Object.h> +#include <string.h> + +#define IDENT_BUFLEN 256 + +@interface Clang : Object +{ +@public + /* State machine operation data. */ + int cs; + + /* Parsing data. */ + char identBuf[IDENT_BUFLEN+1]; + int identLen; + int curLine; +}; + +- (void) initFsm; +- (void) executeWithData:(const char *)data len:(int)len; +- (int) finish; + +@end + +%%{ + machine Clang; + + # Function to buffer a character. + action bufChar { + if ( identLen < IDENT_BUFLEN ) { + identBuf[identLen] = fc; + identLen += 1; + } + } + + # Function to clear the buffer. + action clearBuf { + identLen = 0; + } + + # Functions to dump tokens as they are matched. + action ident { + identBuf[identLen] = 0; + printf("ident(%i): %s\n", curLine, identBuf); + } + action literal { + identBuf[identLen] = 0; + printf("literal(%i): %s\n", curLine, identBuf); + } + action float { + identBuf[identLen] = 0; + printf("float(%i): %s\n", curLine, identBuf); + } + action int { + identBuf[identLen] = 0; + printf("int(%i): %s\n", curLine, identBuf); + } + action hex { + identBuf[identLen] = 0; + printf("hex(%i): 0x%s\n", curLine, identBuf); + } + action symbol { + identBuf[identLen] = 0; + printf("symbol(%i): %s\n", curLine, identBuf); + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/*' . extend* $0 . '*/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + int = digit+ %int; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + int | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ curLine += 1; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; +}%% + +@implementation Clang + +%% write data; + +- (void) initFsm; +{ + identLen = 0; + curLine = 1; + %% write init; +} + +- (void) executeWithData:(const char *)data len:(int)len; +{ + const char *p = data; + const char *pe = data + len; + const char *eof = pe; + + %% write exec; +} + +- (int) finish; +{ + if ( cs == Clang_error ) + return -1; + if ( cs >= Clang_first_final ) + return 1; + return 0; +} + +@end + +#define BUFSIZE 2048 + +Clang *fsm; +char buf[BUFSIZE]; + +void test( char *buf ) +{ + int len = strlen(buf); + fsm = [[Clang alloc] init]; + [fsm initFsm]; + [fsm executeWithData:buf len:len]; + if ( [fsm finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "999 0xaAFF99 99.99 /*\n" + "*/ 'lksdj' //\n" + "\"\n" + "\n" + "literal\n" + "\n" + "\n" + "\"0x00aba foobardd.ddsf 0x0.9\n" ); + + test( + "wordwithnum00asdf\n" + "000wordfollowsnum,makes new symbol\n" + "\n" + "finishing early /* unfinished ...\n" ); + + test( + "/*\n" + " * Copyright\n" + " */\n" + "\n" + "/* Aapl.\n" + " */\n" + "\n" + "#define _AAPL_RESIZE_H\n" + "\n" + "#include <assert.h>\n" + "\n" + "#ifdef AAPL_NAMESPACE\n" + "namespace Aapl {\n" + "#endif\n" + "#define LIN_DEFAULT_STEP 256\n" + "#define EXPN_UP( existing, needed ) \\\n" + " need > eng ? (ned<<1) : eing\n" + " \n" + "\n" + "/*@}*/\n" + "#undef EXPN_UP\n" + "#ifdef AAPL_NAMESPACE\n" + "#endif /* _AAPL_RESIZE_H */\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +int(1): 999 +hex(1): 0xaAFF99 +float(1): 99.99 +literal(2): lksdj +literal(8): + +literal + + + +hex(8): 0x00aba +ident(8): foobardd +symbol(8): . +ident(8): ddsf +hex(8): 0x0 +symbol(8): . +int(8): 9 +ACCEPT +ident(1): wordwithnum00asdf +int(2): 000 +ident(2): wordfollowsnum +symbol(2): , +ident(2): makes +ident(2): new +ident(2): symbol +ident(4): finishing +ident(4): early +FAIL +symbol(8): # +ident(8): define +ident(8): _AAPL_RESIZE_H +symbol(10): # +ident(10): include +symbol(10): < +ident(10): assert +symbol(10): . +ident(10): h +symbol(10): > +symbol(12): # +ident(12): ifdef +ident(12): AAPL_NAMESPACE +ident(13): namespace +ident(13): Aapl +symbol(13): { +symbol(14): # +ident(14): endif +symbol(15): # +ident(15): define +ident(15): LIN_DEFAULT_STEP +int(15): 256 +symbol(16): # +ident(16): define +ident(16): EXPN_UP +symbol(16): ( +ident(16): existing +symbol(16): , +ident(16): needed +symbol(16): ) +symbol(16): \ +ident(17): need +symbol(17): > +ident(17): eng +symbol(17): ? +symbol(17): ( +ident(17): ned +symbol(17): < +symbol(17): < +int(17): 1 +symbol(17): ) +symbol(17): : +ident(17): eing +symbol(21): # +ident(21): undef +ident(21): EXPN_UP +symbol(22): # +ident(22): ifdef +ident(22): AAPL_NAMESPACE +symbol(23): # +ident(23): endif +ACCEPT +#endif diff --git a/test/clang3.rl b/test/clang3.rl new file mode 100644 index 0000000..82f5eed --- /dev/null +++ b/test/clang3.rl @@ -0,0 +1,321 @@ +/* + * @LANG: d + * A mini C-like language scanner. + */ + +module clang; + +import std.c.stdio; + +char[] string(char c) +{ + char[] result = new char[2]; + result[0] = c; + result[1] = 0; + return result[0 .. 1]; +} + +class CLang +{ + /* Parsing data. */ + char[] identBuf; + int curLine; + + this() + { + } + + /* State machine operation data. */ + int cs; + + %%{ + machine clang; + + # Function to buffer a character. + action bufChar { + identBuf ~= fc; + } + + # Function to clear the buffer. + action clearBuf { + + identBuf = null; + } + + # Functions to dump tokens as they are matched. + action ident { + printf("ident(%i): %.*s\n", curLine, identBuf); + } + action literal { + printf("literal(%i): %.*s\n", curLine, identBuf); + } + action float { + printf("float(%i): %.*s\n", curLine, identBuf); + } + action int { + printf("int(%i): %.*s\n", curLine, identBuf); + } + action hex { + printf("hex(%i): 0x%.*s\n", curLine, identBuf); + } + action symbol { + printf("symbol(%i): %.*s\n", curLine, identBuf); + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/*' . extend* $0 . '*/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + int = digit+ %int; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + int | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ curLine++; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; + }%% + + %% write data noprefix; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ) + { + curLine = 1; + %% write init; + } + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( char* _data, int _len ) + { + char *p = _data; + char *pe = _data + _len; + char *eof = pe; + %% write exec; + } + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ) + { + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } +} + +static const int BUFSIZE = 1024; + +void test( char buf[] ) +{ + CLang scanner = new CLang(); + scanner.init(); + scanner.execute( buf.ptr, buf.length ); + if ( scanner.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); + + return 0; +} + +int main() +{ + test( + "999 0xaAFF99 99.99 /*\n" + "*/ 'lksdj' //\n" + "\"\n" + "\n" + "literal\n" + "\n" + "\n" + "\"0x00aba foobardd.ddsf 0x0.9\n" ); + + test( + "wordwithnum00asdf\n" + "000wordfollowsnum,makes new symbol\n" + "\n" + "finishing early /* unfinished ...\n" ); + + test( + "/*\n" + " * Copyright\n" + " */\n" + "\n" + "/* Aapl.\n" + " */\n" + "\n" + "#define _AAPL_RESIZE_H\n" + "\n" + "#include <assert.h>\n" + "\n" + "#ifdef AAPL_NAMESPACE\n" + "namespace Aapl {\n" + "#endif\n" + "#define LIN_DEFAULT_STEP 256\n" + "#define EXPN_UP( existing, needed ) \\\n" + " need > eng ? (ned<<1) : eing\n" + " \n" + "\n" + "/*@}*/\n" + "#undef EXPN_UP\n" + "#ifdef AAPL_NAMESPACE\n" + "#endif /* _AAPL_RESIZE_H */\n" ); + return 0; +} + +/+ _____OUTPUT_____ +int(1): 999 +hex(1): 0xaAFF99 +float(1): 99.99 +literal(2): lksdj +literal(8): + +literal + + + +hex(8): 0x00aba +ident(8): foobardd +symbol(8): . +ident(8): ddsf +hex(8): 0x0 +symbol(8): . +int(8): 9 +ACCEPT +ident(1): wordwithnum00asdf +int(2): 000 +ident(2): wordfollowsnum +symbol(2): , +ident(2): makes +ident(2): new +ident(2): symbol +ident(4): finishing +ident(4): early +FAIL +symbol(8): # +ident(8): define +ident(8): _AAPL_RESIZE_H +symbol(10): # +ident(10): include +symbol(10): < +ident(10): assert +symbol(10): . +ident(10): h +symbol(10): > +symbol(12): # +ident(12): ifdef +ident(12): AAPL_NAMESPACE +ident(13): namespace +ident(13): Aapl +symbol(13): { +symbol(14): # +ident(14): endif +symbol(15): # +ident(15): define +ident(15): LIN_DEFAULT_STEP +int(15): 256 +symbol(16): # +ident(16): define +ident(16): EXPN_UP +symbol(16): ( +ident(16): existing +symbol(16): , +ident(16): needed +symbol(16): ) +symbol(16): \ +ident(17): need +symbol(17): > +ident(17): eng +symbol(17): ? +symbol(17): ( +ident(17): ned +symbol(17): < +symbol(17): < +int(17): 1 +symbol(17): ) +symbol(17): : +ident(17): eing +symbol(21): # +ident(21): undef +ident(21): EXPN_UP +symbol(22): # +ident(22): ifdef +ident(22): AAPL_NAMESPACE +symbol(23): # +ident(23): endif +ACCEPT +++++++++++++++++/ diff --git a/test/clang4.rl b/test/clang4.rl new file mode 100644 index 0000000..c3bb399 --- /dev/null +++ b/test/clang4.rl @@ -0,0 +1,188 @@ +/* + * @LANG: indep + * @NEEDS_EOF: yes + */ + +char array[32]; +int pos; +int line; +%% +pos = 0; +line = 1; +%%{ + machine clang; + + # Function to buffer a character. + action bufChar { array[pos] = fc; pos = pos + 1; } + + # Function to clear the buffer. + action clearBuf { pos = 0; } + + # Functions to dump tokens as they are matched. + action ident { + prints "ident("; + printi line; + prints ","; + printi pos; + prints "): "; + printb array; + prints "\n"; + } + action literal { + prints "literal("; + printi line; + prints ","; + printi pos; + prints "): "; + printb array; + prints "\n"; + } + action float { + prints "float("; + printi line; + prints ","; + printi pos; + prints "): "; + printb array; + prints "\n"; + } + action integer { + prints "int("; + printi line; + prints ","; + printi pos; + prints "): "; + printb array; + prints "\n"; + } + action hex { + prints "hex("; + printi line; + prints ","; + printi pos; + prints "): "; + printb array; + prints "\n"; + } + action symbol { + prints "symbol("; + printi line; + prints ","; + printi pos; + prints "): "; + printb array; + prints "\n"; + } + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 33 .. 126; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/!' . extend* $0 . '!/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + integer = digit+ %integer; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + integer | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @{ line = line + 1; } )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; +}%% +/* _____INPUT_____ +"999 0xaAFF99 99.99 /!\n!/ 'lksdj' //\n\"\n\nliteral\n\n\n\"0x00aba foobardd.ddsf 0x0.9\n" +"wordwithnum00asdf\n000wordfollowsnum,makes new symbol\n\nfinishing early /! unfinished ...\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +int(1,3): 999 +hex(1,6): aAFF99 +float(1,5): 99.99 +literal(2,5): lksdj +literal(8,12): + +literal + + + +hex(8,5): 00aba +ident(8,8): foobardd +symbol(8,1): . +ident(8,4): ddsf +hex(8,1): 0 +symbol(8,1): . +int(8,1): 9 +ACCEPT +ident(1,17): wordwithnum00asdf +int(2,3): 000 +ident(2,14): wordfollowsnum +symbol(2,1): , +ident(2,5): makes +ident(2,3): new +ident(2,6): symbol +ident(4,9): finishing +ident(4,5): early +FAIL +_____OUTPUT_____ */ + diff --git a/test/cond1.rl b/test/cond1.rl new file mode 100644 index 0000000..7c3ffff --- /dev/null +++ b/test/cond1.rl @@ -0,0 +1,69 @@ +/* + * @LANG: indep + * @ALLOW_GENFLAGS: -T0 -T1 -G0 -G1 -G2 + */ +bool i; +bool j; +bool k; +%% + +%%{ + machine foo; + + action c1 {i} + action c2 {j} + action c3 {k} + action one { prints " one\n";} + action two { prints " two\n";} + action three { prints " three\n";} + + action seti { if ( fc == 48 ) i = false; else i = true; } + action setj { if ( fc == 48 ) j = false; else j = true; } + action setk { if ( fc == 48 ) k = false; else k = true; } + + action break {fbreak;} + + one = 'a' 'b' when c1 'c' @one; + two = 'a'* 'b' when c2 'c' @two; + three = 'a'+ 'b' when c3 'c' @three; + + main := + [01] @seti + [01] @setj + [01] @setk + ( one | two | three ) '\n' @break; + +}%% + +/* _____INPUT_____ +"000abc\n" +"100abc\n" +"010abc\n" +"110abc\n" +"001abc\n" +"101abc\n" +"011abc\n" +"111abc\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +FAIL + one +ACCEPT + two +ACCEPT + one + two +ACCEPT + three +ACCEPT + one + three +ACCEPT + two + three +ACCEPT + one + two + three +ACCEPT +_____OUTPUT_____ */ diff --git a/test/cond2.rl b/test/cond2.rl new file mode 100644 index 0000000..7e49ab8 --- /dev/null +++ b/test/cond2.rl @@ -0,0 +1,91 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + + action c1 {i} + action c2 {j} + + action one { cout << " one" << endl;} + action two { cout << " two" << endl;} + + main := ( + [a-z] | + ('\n' when c1 @one) + )* + ('\n' when c2 @two); +}%% + +%% write data noerror; + +void test( int i, int j, const char *str ) +{ + int cs = foo_start; + const char *p = str; + const char *pe = str + strlen( str ); + + cout << "run:" << endl; + %% write exec; + if ( cs >= foo_first_final ) + cout << " success" << endl; + else + cout << " failure" << endl; + cout << endl; +} + +int main() +{ + test( 0, 0, "hi\n\n" ); + test( 1, 0, "hi\n\n" ); + test( 0, 1, "hi\n" ); + test( 0, 1, "hi\n\n" ); + test( 1, 1, "hi\n" ); + test( 1, 1, "hi\n\n" ); + test( 1, 1, "hi\n\nx" ); + return 0; +} + +#ifdef _____OUTPUT_____ +run: + failure + +run: + one + one + failure + +run: + two + success + +run: + two + failure + +run: + one + two + success + +run: + one + two + one + two + success + +run: + one + two + one + two + failure + +#endif diff --git a/test/cond3.rl b/test/cond3.rl new file mode 100644 index 0000000..80904b5 --- /dev/null +++ b/test/cond3.rl @@ -0,0 +1,59 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + + action hit_5 {c == 5} + action done { cout << " done" << endl; } + action inc {c++;} + + # The any* includes '\n' when hit_5 is true, so use guarded concatenation. + main := (any @inc)* :> '\n' when hit_5 @done; +}%% + +%% write data noerror; + +void test( const char *str ) +{ + int cs = foo_start; + int c = 0; + const char *p = str; + const char *pe = str + strlen( str ); + + cout << "run:" << endl; + %% write exec; + if ( cs >= foo_first_final ) + cout << " success" << endl; + else + cout << " failure" << endl; + cout << endl; +} + +int main() +{ + test( "12345\n" ); // success + test( "\n2345\n" ); // success, first newline ignored + test( "1234\n" ); // failure, didn't get 5 chars before newline. + return 0; +} + +#ifdef _____OUTPUT_____ +run: + done + success + +run: + done + success + +run: + failure + +#endif diff --git a/test/cond4.rl b/test/cond4.rl new file mode 100644 index 0000000..380c5ff --- /dev/null +++ b/test/cond4.rl @@ -0,0 +1,54 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + + action c1 {(cout << "c1 ", true)} + action c2 {(cout << "c2 ", true)} + action c3 {(cout << "c3 ", true)} + action c4 {(cout << "c4 ", true)} + + main := ( + 10 .. 60 when c1 | + 20 .. 40 when c2 | + 30 .. 50 when c3 | + 32 .. 38 when c4 | + 0 .. 70 )* ${cout << "char: " << (int)*p << endl;}; +}%% + +%% write data noerror nofinal; + +void test( char *str ) +{ + int len = strlen( str ); + int cs = foo_start; + char *p = str, *pe = str+len; + %% write exec; +} + +char data[] = { 5, 15, 25, 31, 35, 39, 45, 55, 65, 0 }; + +int main() +{ + test( data ); + return 0; +} + +#ifdef _____OUTPUT_____ +char: 5 +c1 char: 15 +c1 c2 char: 25 +c1 c2 c3 char: 31 +c1 c2 c3 c4 char: 35 +c1 c2 c3 char: 39 +c1 c3 char: 45 +c1 char: 55 +char: 65 +#endif diff --git a/test/cond5.rl b/test/cond5.rl new file mode 100644 index 0000000..b6ab4ae --- /dev/null +++ b/test/cond5.rl @@ -0,0 +1,59 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine foo; + write data noerror; +}%% + +void test( const char *str ) +{ + int cs = foo_start; + int c = 0; + const char *p = str; + const char *pe = str + strlen( str ); + char last = '0'; + + cout << "run:"; + %%{ + action d1 { cout << " d1"; } + action see_five { cout << " see_five"; } + + see_five = ([0-9] when{c++ < 5} @d1)* '\n' @see_five; + + action in_sequence { cout << " in_sequence"; } + action d2 { last = *p; cout << " d2"; } + in_sequence = ( [0-9] when { *p == last+1 } @d2 )* '\n' @in_sequence; + + main := ( see_five | in_sequence ) ${cout << " |";}; + + write exec; + }%% + if ( cs < foo_first_final ) + cout << " failure"; + cout << endl; +} + +int main() +{ + test( "123456789012\n" ); // fails both + test( "123456789\n" ); // fails five + test( "1234\n" ); // fails five + test( "13245\n" ); // fails sequence + test( "12345\n" ); // succeeds in both + return 0; +} + +#ifdef _____OUTPUT_____ +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | d2 | d2 | d2 | d2 | failure +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | d2 | d2 | d2 | d2 | in_sequence | +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | see_five in_sequence | +run: d1 d2 | d1 | d1 | d1 | d1 | see_five | +run: d1 d2 | d1 d2 | d1 d2 | d1 d2 | d1 d2 | see_five in_sequence | +#endif diff --git a/test/cond6.rl b/test/cond6.rl new file mode 100644 index 0000000..ede9ed8 --- /dev/null +++ b/test/cond6.rl @@ -0,0 +1,61 @@ +/* + * @LANG: c++ + */ + +/* Balanced parenthesis with conditions. */ + +#include <iostream> +#include <string.h> +using std::cout; +using std::endl; + +%%{ + machine cond; + write data noerror; +}%% + +void test( const char *str ) +{ + int cs = cond_start, n = 0; + const char *p = str; + const char *pe = str + strlen( str ); + + %%{ + comment = '(' @{n=0;} + ( '('@{n++;} | ')'@{n--;} | [^()] )* + :> ')' when{!n}; + + main := ' '* comment ' '* '\n' @{cout << "success";}; + + write exec; + }%% + if ( cs < cond_first_final ) + cout << "failure"; + cout << endl; +} + +int main() +{ + test( "( ( )\n" ); + test( "()()\n" ); + test( "(((\n" ); + test( "((()\n" ); + test( "((())\n" ); + test( "()\n" ); + test( "((()))\n" ); + test( "(()())\n" ); + test( "((())()(((()))))\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +failure +failure +failure +failure +failure +success +success +success +success +#endif diff --git a/test/cond7.rl b/test/cond7.rl new file mode 100644 index 0000000..a88e67c --- /dev/null +++ b/test/cond7.rl @@ -0,0 +1,82 @@ +/* + * @LANG: indep + */ +int i; +int c; +%% + +%%{ + machine foo; + + action testi {i > 0} + action inc { + i = i - 1; + c = <int>(fc); + prints "item: "; + printi c; + prints "\n"; + } + + count = [0-9] @{ + i = <int>(fc - '0'); + prints "count: "; + printi i; + prints "\n"; + }; + + sub = + count # record the number of digits + ( digit when testi @inc )* outwhen !testi; + + main := sub sub '\n'; +}%% + +/* _____INPUT_____ +"00\n" +"019\n" +"190\n" +"1719\n" +"1040000\n" +"104000a\n" +"104000\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +count: 0 +count: 0 +ACCEPT +count: 0 +count: 1 +item: 57 +ACCEPT +count: 1 +item: 57 +count: 0 +ACCEPT +count: 1 +item: 55 +count: 1 +item: 57 +ACCEPT +count: 1 +item: 48 +count: 4 +item: 48 +item: 48 +item: 48 +item: 48 +ACCEPT +count: 1 +item: 48 +count: 4 +item: 48 +item: 48 +item: 48 +FAIL +count: 1 +item: 48 +count: 4 +item: 48 +item: 48 +item: 48 +FAIL +_____OUTPUT_____ */ diff --git a/test/cppscan1.h b/test/cppscan1.h new file mode 100644 index 0000000..346dd9b --- /dev/null +++ b/test/cppscan1.h @@ -0,0 +1,112 @@ +#ifndef _CPPSCAN1_H +#define _CPPSCAN1_H + +#include <iostream> +#include <cstdlib> +#include <cstring> + +using namespace std; + +#define BUFSIZE 2048 + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 + +/* A growable buffer for collecting headers. */ +struct Buffer +{ + Buffer() : data(0), allocated(0), length(0) { } + Buffer( const Buffer &other ) { + data = (char*)malloc( other.allocated ); + memcpy( data, other.data, other.length ); + allocated = other.allocated; + length = other.length; + } + ~Buffer() { empty(); } + + void append( char p ) { + if ( ++length > allocated ) + upAllocate( length*2 ); + data[length-1] = p; + } + void append( char *str, int len ) { + if ( (length += len) > allocated ) + upAllocate( length*2 ); + memcpy( data+length-len, str, len ); + } + + void clear() { length = 0; } + void upAllocate( int len ); + void empty(); + + char *data; + int allocated; + int length; +}; + + +struct Scanner +{ + Scanner( std::ostream &out ) + : out(out) { } + + std::ostream &out; + + int line, col; + int tokStart; + int inlineDepth; + int count; + Buffer tokBuf; + Buffer nonTokBuf; + + void pass(char c) { nonTokBuf.append(c); } + void buf(char c) { tokBuf.append(c); } + void token( int id ); + + int cs, stack, top; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +#endif diff --git a/test/cppscan1.rl b/test/cppscan1.rl new file mode 100644 index 0000000..92869f7 --- /dev/null +++ b/test/cppscan1.rl @@ -0,0 +1,283 @@ +/* + * @LANG: c++ + * + * Test works with split code gen. + */ + +#include "cppscan1.h" + +%%{ + machine Scanner; + access fsm->; + + action pass { fsm->pass(fc); } + action buf { fsm->buf(fc); } + + action emit_slit { fsm->token( TK_Slit ); } + action emit_dlit { fsm->token( TK_Dlit ); } + action emit_id { fsm->token( TK_Id ); } + action emit_integer_decimal { fsm->token( TK_IntegerDecimal ); } + action emit_integer_octal { fsm->token( TK_IntegerOctal ); } + action emit_integer_hex { fsm->token( TK_IntegerHex ); } + action emit_float { fsm->token( TK_Float ); } + action emit_symbol { fsm->token( fsm->tokBuf.data[0] ); } + action tokst { fsm->tokStart = fsm->col; } + + # Single and double literals. + slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit; + dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit; + + # Identifiers + id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + float = + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) >tokst $buf %emit_float; + + # Integer decimal. Leading part buffered by float. + integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal; + + # Integer octal. Leading part buffered by float. + integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal; + + # Integer hex. Leading 0 buffered by float. + integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex; + + # Only buffer the second item, first buffered by symbol. */ + namesep = '::' @buf %{fsm->token( TK_NameSep );}; + deqs = '==' @buf %{fsm->token( TK_EqualsEquals );}; + neqs = '!=' @buf %{fsm->token( TK_NotEquals );}; + and_and = '&&' @buf %{fsm->token( TK_AndAnd );}; + or_or = '||' @buf %{fsm->token( TK_OrOr );}; + mult_assign = '*=' @buf %{fsm->token( TK_MultAssign );}; + percent_assign = '%=' @buf %{fsm->token( TK_PercentAssign );}; + plus_assign = '+=' @buf %{fsm->token( TK_PlusAssign );}; + minus_assign = '-=' @buf %{fsm->token( TK_MinusAssign );}; + amp_assign = '&=' @buf %{fsm->token( TK_AmpAssign );}; + caret_assign = '^=' @buf %{fsm->token( TK_CaretAssign );}; + bar_assign = '|=' @buf %{fsm->token( TK_BarAssign );}; + plus_plus = '++' @buf %{fsm->token( TK_PlusPlus );}; + minus_minus = '--' @buf %{fsm->token( TK_MinusMinus );}; + arrow = '->' @buf %{fsm->token( TK_Arrow );}; + arrow_star = '->*' @buf %{fsm->token( TK_ArrowStar );}; + dot_star = '.*' @buf %{fsm->token( TK_DotStar );}; + + # Buffer both items. * + div_assign = '/=' @{fsm->buf('/');fsm->buf(fc);} %{fsm->token( TK_DivAssign );}; + + # Double dot is sent as two dots. + dot_dot = '..' %{fsm->token('.'); fsm->buf('.'); fsm->token('.');}; + + # Three char compounds, first item already buffered. */ + dot_dot_dot = '...' %{fsm->buf('.'); fsm->buf('.'); fsm->token( TK_DotDotDot );}; + + # All compunds + compound = namesep | deqs | neqs | and_and | or_or | mult_assign | + div_assign | percent_assign | plus_assign | minus_assign | + amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | + arrow | arrow_star | dot_star | dot_dot | dot_dot_dot; + + # Single char symbols. + symbol = + ( punct - [./_"'] ) >tokst $buf %emit_symbol | + # Do not immediately buffer slash, may be start of comment. + '/' >tokst %{ fsm->buf('/'); fsm->token( '/' ); } | + # Dot covered by float. + '.' %emit_symbol; + + # Comments and whitespace. + commc = '/*' @{fsm->pass('/'); fsm->pass('*');} ( any* $0 '*/' @1 ) $pass; + commcc = '//' @{fsm->pass('/'); fsm->pass('/');} ( any* $0 '\n' @1 ) $pass; + whitespace = ( any - ( 0 | 33..126 ) )+ $pass; + + action onEOFChar { + /* On EOF char, write out the non token buffer. */ + fsm->nonTokBuf.append(0); + cout << fsm->nonTokBuf.data; + fsm->nonTokBuf.clear(); + } + + # Using 0 as eof. If seeingAs a result all null characters get ignored. + EOF = 0 @onEOFChar; + + # All outside code tokens. + tokens = ( + id | slit | dlit | float | integer_decimal | + integer_octal | integer_hex | compound | symbol ); + nontok = ( commc | commcc | whitespace | EOF ); + + position = ( + '\n' @{ fsm->line += 1; fsm->col = 1; } | + [^\n] @{ fsm->col += 1; } )*; + + main := ( ( tokens | nontok )** ) & position; +}%% + +%% write data; + +void Scanner::init( ) +{ + Scanner *fsm = this; + /* A count of the number of characters in + * a token. Used for % sequences. */ + count = 0; + line = 1; + col = 1; + + %% write init; +} + +int Scanner::execute( const char *data, int len ) +{ + Scanner *fsm = this; + const char *p = data; + const char *pe = data + len; + const char *eof = pe; + + %% write exec; + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +int Scanner::finish( ) +{ + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +void Scanner::token( int id ) +{ + /* Leader. */ + if ( nonTokBuf.length > 0 ) { + nonTokBuf.append(0); + cout << nonTokBuf.data; + nonTokBuf.clear(); + } + + /* Token data. */ + tokBuf.append(0); + cout << '<' << id << '>' << tokBuf.data; + tokBuf.clear(); +} + +void Buffer::empty() +{ + if ( data != 0 ) { + free( data ); + + data = 0; + length = 0; + allocated = 0; + } +} + +void Buffer::upAllocate( int len ) +{ + if ( data == 0 ) + data = (char*) malloc( len ); + else + data = (char*) realloc( data, len ); + allocated = len; +} + +void test( const char *buf ) +{ + Scanner scanner(cout); + scanner.init(); + scanner.execute( buf, strlen(buf) ); + + /* The last token is ignored (because there is no next token). Send + * trailing null to force the last token into whitespace. */ + char eof = 0; + if ( scanner.execute( &eof, 1 ) <= 0 ) { + cerr << "cppscan: scan failed" << endl; + return; + } + cout.flush(); +} + +int main() +{ + test( + "/*\n" + " * Copyright \n" + " */\n" + "\n" + "/* Construct an fsmmachine from a graph. */\n" + "RedFsmAp::RedFsmAp( FsmAp *graph, bool complete )\n" + ":\n" + " graph(graph),\n" + "{\n" + " assert( sizeof(RedTransAp) <= sizeof(TransAp) );\n" + "\n" + " reduceMachine();\n" + "}\n" + "\n" + "{\n" + " /* Get the transition that we want to extend. */\n" + " RedTransAp *extendTrans = list[pos].value;\n" + "\n" + " /* Look ahead in the transition list. */\n" + " for ( int next = pos + 1; next < list.length(); pos++, next++ ) {\n" + " if ( ! keyOps->eq( list[pos].highKey, nextKey ) )\n" + " break;\n" + " }\n" + " return false;\n" + "}\n" + "\n" ); + + test( + "->*\n" + ".*\n" + "/*\"*/\n" + "\"/*\"\n" + "L'\"'\n" + "L\"'\"\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +/* + * Copyright + */ + +/* Construct an fsmmachine from a graph. */ +<195>RedFsmAp<197>::<195>RedFsmAp<40>( <195>FsmAp <42>*<195>graph<44>, <195>bool <195>complete <41>) +<58>: + <195>graph<40>(<195>graph<41>)<44>, +<123>{ + <195>assert<40>( <195>sizeof<40>(<195>RedTransAp<41>) <60><<61>= <195>sizeof<40>(<195>TransAp<41>) <41>)<59>; + + <195>reduceMachine<40>(<41>)<59>; +<125>} + +<123>{ + /* Get the transition that we want to extend. */ + <195>RedTransAp <42>*<195>extendTrans <61>= <195>list<91>[<195>pos<93>]<46>.<195>value<59>; + + /* Look ahead in the transition list. */ + <195>for <40>( <195>int <195>next <61>= <195>pos <43>+ <218>1<59>; <195>next <60>< <195>list<46>.<195>length<40>(<41>)<59>; <195>pos<212>++<44>, <195>next<212>++ <41>) <123>{ + <195>if <40>( <33>! <195>keyOps<211>-><195>eq<40>( <195>list<91>[<195>pos<93>]<46>.<195>highKey<44>, <195>nextKey <41>) <41>) + <195>break<59>; + <125>} + <195>return <195>false<59>; +<125>} + +<214>->* +<215>.* +/*"*/ +<192>"/*" +<193>L'"' +<192>L"'" +#endif diff --git a/test/cppscan2.rl b/test/cppscan2.rl new file mode 100644 index 0000000..a609bba --- /dev/null +++ b/test/cppscan2.rl @@ -0,0 +1,404 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 +#define TK_Whitespace 241 +#define TK_Comment 242 + +#define BUFSIZE 4096 + +int tok; +char buf[BUFSIZE]; +const char *ts, *te; +void token( const char *data, int len ); +bool discard = false; + +struct Scanner +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Scanner; + + # Single and double literals. + slit = ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) @{tok = TK_Slit;}; + dlit = ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) @{tok = TK_Dlit;}; + + # Identifiers + id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) @{tok = TK_Id;}; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + float = + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) @{tok = TK_Float;}; + + # Integer decimal. Leading part buffered by float. + integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) @{tok = TK_IntegerDecimal;}; + + # Integer octal. Leading part buffered by float. + integer_octal = ( '0' [0-9]+ [ulUL]{0,2} ) @{tok = TK_IntegerOctal;}; + + # Integer hex. Leading 0 buffered by float. + integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) @{tok = TK_IntegerHex;}; + + # Only buffer the second item, first buffered by symbol. */ + namesep = '::' @{tok = TK_NameSep;}; + deqs = '==' @{tok = TK_EqualsEquals;}; + neqs = '!=' @{tok = TK_NotEquals;}; + and_and = '&&' @{tok = TK_AndAnd;}; + or_or = '||' @{tok = TK_OrOr;}; + mult_assign = '*=' @{tok = TK_MultAssign;}; + div_assign = '/=' @{tok = TK_DivAssign;}; + percent_assign = '%=' @{tok = TK_PercentAssign;}; + plus_assign = '+=' @{tok = TK_PlusAssign;}; + minus_assign = '-=' @{tok = TK_MinusAssign;}; + amp_assign = '&=' @{tok = TK_AmpAssign;}; + caret_assign = '^=' @{tok = TK_CaretAssign;}; + bar_assign = '|=' @{tok = TK_BarAssign;}; + plus_plus = '++' @{tok = TK_PlusPlus;}; + minus_minus = '--' @{tok = TK_MinusMinus;}; + arrow = '->' @{tok = TK_Arrow;}; + arrow_star = '->*' @{tok = TK_ArrowStar;}; + dot_star = '.*' @{tok = TK_DotStar;}; + + # Three char compounds, first item already buffered. */ + dot_dot_dot = '...' @{tok = TK_DotDotDot;}; + + # All compunds + compound = namesep | deqs | neqs | and_and | or_or | mult_assign | + div_assign | percent_assign | plus_assign | minus_assign | + amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | + arrow | arrow_star | dot_star | dot_dot_dot; + + # Single char symbols. + symbol = ( punct - [_"'] ) @{tok = fc;}; + + action discard { + discard = true; + } + + # Comments and whitespace. + commc = '/*' @discard ( any* $0 '*/' @1 ) @{tok = TK_Comment;}; + commcc = '//' @discard ( any* $0 '\n' @1 ) @{tok = TK_Comment;}; + whitespace = ( any - 33..126 )+ >discard @{tok = TK_Whitespace;}; + + # All outside code tokens. + tokens = ( + id | slit | dlit | float | integer_decimal | + integer_octal | integer_hex | compound | symbol | + commc | commcc | whitespace ); + + action onError { + if ( tok != 0 ) { + const char *rst_data; + + if ( tok == TK_Comment || tok == TK_Whitespace ) { + /* Reset comment status, don't send. */ + discard = false; + + /* Restart right at the error point if consuming whitespace or + * a comment. Consume may have spanned multiple buffers. */ + rst_data = fpc; + } + else { + /* Send the token. */ + token( ts, te - ts + 1 ); + + /* Restart right after the token. */ + rst_data = te+1; + } + + ts = 0; + fexec rst_data; + fgoto main; + } + } + + main := tokens >{ts=fpc;} @{te=fpc;} $!onError; +}%% + +%% write data; + +int Scanner::init( ) +{ + tok = 0; + ts = 0; + te = 0; + + %% write init; + return 1; +} + +int Scanner::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + const char *eof = pe; + + %% write exec; + + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +int Scanner::finish( ) +{ + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + + +void token( const char *data, int len ) +{ + cout << "<" << tok << "> "; + for ( int i = 0; i < len; i++ ) + cout << data[i]; + cout << '\n'; +} + +void test( const char * data ) +{ + Scanner scanner; + scanner.init(); + scanner.execute( data, strlen(data) ); + scanner.finish(); + if ( tok != 0 && tok != TK_Comment && tok != TK_Whitespace ) + token( ts, te - ts + 1 ); +} + +int main() +{ + test( + "/*\n" + " * Copyright \n" + " */\n" + "\n" + "\n" + "/* Move ranges to the singles list. */\n" + "void RedFsmAp::move( RedStateAp *state )\n" + "{\n" + " RedTranst &range = state->outRange;\n" + " for ( int rpos = 0; rpos < range.length(); ) {\n" + " if ( can( range, rpos ) ) {\n" + " while ( range[rpos].value != range[rpos+1].value ) {\n" + " single.append( range[rpos+1] );\n" + " }\n" + " \n" + " range[rpos].highKey = range[rpos+1].highKey;\n" + " }\n" + " else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {\n" + " single.append( range[rpos] );\n" + " }\n" + " }\n" + "}\n" + "\n" ); + + test( + "->*\n" + ".*\n" + "/*\"*/\n" + "\"/*\"\n" + "L'\"'\n" + "L\"'\"\n" + "...\n" ); +} + +#ifdef _____OUTPUT_____ +<195> void +<195> RedFsmAp +<197> :: +<195> move +<40> ( +<195> RedStateAp +<42> * +<195> state +<41> ) +<123> { +<195> RedTranst +<38> & +<195> range +<61> = +<195> state +<211> -> +<195> outRange +<59> ; +<195> for +<40> ( +<195> int +<195> rpos +<61> = +<218> 0 +<59> ; +<195> rpos +<60> < +<195> range +<46> . +<195> length +<40> ( +<41> ) +<59> ; +<41> ) +<123> { +<195> if +<40> ( +<195> can +<40> ( +<195> range +<44> , +<195> rpos +<41> ) +<41> ) +<123> { +<195> while +<40> ( +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> value +<224> != +<195> range +<91> [ +<195> rpos +<43> + +<218> 1 +<93> ] +<46> . +<195> value +<41> ) +<123> { +<195> single +<46> . +<195> append +<40> ( +<195> range +<91> [ +<195> rpos +<43> + +<218> 1 +<93> ] +<41> ) +<59> ; +<125> } +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> highKey +<61> = +<195> range +<91> [ +<195> rpos +<43> + +<218> 1 +<93> ] +<46> . +<195> highKey +<59> ; +<125> } +<195> else +<195> if +<40> ( +<195> keyOps +<211> -> +<195> span +<40> ( +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> lowKey +<44> , +<195> range +<91> [ +<195> rpos +<93> ] +<46> . +<195> highKey +<41> ) +<223> == +<218> 1 +<41> ) +<123> { +<195> single +<46> . +<195> append +<40> ( +<195> range +<91> [ +<195> rpos +<93> ] +<41> ) +<59> ; +<125> } +<125> } +<125> } +<214> ->* +<215> .* +<192> "/*" +<193> L'"' +<192> L"'" +<240> ... +#endif diff --git a/test/cppscan3.rl b/test/cppscan3.rl new file mode 100644 index 0000000..67b8624 --- /dev/null +++ b/test/cppscan3.rl @@ -0,0 +1,285 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 +#define TK_Whitespace 241 +#define TK_Comment 242 + +#define BUFSIZE 4096 + +char buf[BUFSIZE]; + +struct Scanner +{ + int cs, act; + const char *ts, *te; + + void token( int tok ); + void run(); + + void init( ); + void execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine Scanner; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) + => { token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) + => { token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + =>{ token( TK_Id );}; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) + => { token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) + => { token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) + => { token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) + => { token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' => {token( TK_NameSep );}; + '==' => {token( TK_EqualsEquals );}; + '!=' => {token( TK_NotEquals );}; + '&&' => {token( TK_AndAnd );}; + '||' => {token( TK_OrOr );}; + '*=' => {token( TK_MultAssign );}; + '/=' => {token( TK_DivAssign );}; + '%=' => {token( TK_PercentAssign );}; + '+=' => {token( TK_PlusAssign );}; + '-=' => {token( TK_MinusAssign );}; + '&=' => {token( TK_AmpAssign );}; + '^=' => {token( TK_CaretAssign );}; + '|=' => {token( TK_BarAssign );}; + '++' => {token( TK_PlusPlus );}; + '--' => {token( TK_MinusMinus );}; + '->' => {token( TK_Arrow );}; + '->*' => {token( TK_ArrowStar );}; + '.*' => {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' => { token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) => { token( ts[0] );}; + + action comment { + token( TK_Comment ); + } + + # Comments and whitespace. + '/*' ( any* $0 '*/' @1 ) => comment; + '//' ( any* $0 '\n' @1 ) => comment; + ( any - 33..126 )+ => { token( TK_Whitespace );}; + + *|; +}%% + +%% write data; + +void Scanner::init( ) +{ + %% write init; +} + +/* Returns the count of bytes still in the buffer + * (shifted to the biginning) */ +void Scanner::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + const char *eof = pe; + + %% write exec; + + cout << "P: " << (p - data) << endl; +} + +int Scanner::finish( ) +{ + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + + +void Scanner::token( int tok ) +{ + const char *data = ts; + int len = te - ts; + cout << "<" << tok << "> "; + for ( int i = 0; i < len; i++ ) + cout << data[i]; + cout << '\n'; +} + +void test( const char *buf ) +{ + int len = strlen( buf ); + std::ios::sync_with_stdio(false); + Scanner scanner; + scanner.init(); + + scanner.execute( buf, len ); + if ( scanner.cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cout << "PARSE ERROR" << endl; + } + + /* FIXME: Last token may get lost. */ + scanner.finish(); +} + +int main() +{ + test( + "\"\\\"hi\" /*\n" + "*/\n" + "44 .44\n" + "44. 44\n" + "44 . 44\n" + "44.44\n" + "_hithere22" + ); + + test( + "'\\''\"\\n\\d'\\\"\"\n" + "hi\n" + "99\n" + ".99\n" + "99e-4\n" + "->*\n" + "||\n" + "0x98\n" + "0x\n" + "//\n" + "/* * */" + ); + + test( + "'\n" + "'\n" + ); + +} + +#ifdef _____OUTPUT_____ +<192> "\"hi" +<241> +<242> /* +*/ +<241> + +<218> 44 +<241> +<194> .44 +<241> + +<194> 44. +<241> +<218> 44 +<241> + +<218> 44 +<241> +<46> . +<241> +<218> 44 +<241> + +<194> 44.44 +<241> + +<195> _hithere22 +P: 51 +<193> '\'' +<192> "\n\d'\"" +<241> + +<195> hi +<241> + +<218> 99 +<241> + +<194> .99 +<241> + +<194> 99e-4 +<241> + +<214> ->* +<241> + +<226> || +<241> + +<220> 0x98 +<241> + +<218> 0 +<195> x +<241> + +<242> // + +<242> /* * */ +P: 55 +P: 1 +PARSE ERROR +#endif diff --git a/test/cppscan4.rl b/test/cppscan4.rl new file mode 100644 index 0000000..42a97f1 --- /dev/null +++ b/test/cppscan4.rl @@ -0,0 +1,302 @@ +/* + * @LANG: d + */ + +module cppscan; + +import std.c.stdio; +import std.string; + +const int BUFSIZE = 2048; + +const int TK_Dlit = 192; +const int TK_Slit = 193; +const int TK_Float = 194; +const int TK_Id = 195; +const int TK_NameSep = 197; +const int TK_Arrow = 211; +const int TK_PlusPlus = 212; +const int TK_MinusMinus = 213; +const int TK_ArrowStar = 214; +const int TK_DotStar = 215; +const int TK_ShiftLeft = 216; +const int TK_ShiftRight = 217; +const int TK_IntegerDecimal = 218; +const int TK_IntegerOctal = 219; +const int TK_IntegerHex = 220; +const int TK_EqualsEquals = 223; +const int TK_NotEquals = 224; +const int TK_AndAnd = 225; +const int TK_OrOr = 226; +const int TK_MultAssign = 227; +const int TK_DivAssign = 228; +const int TK_PercentAssign = 229; +const int TK_PlusAssign = 230; +const int TK_MinusAssign = 231; +const int TK_AmpAssign = 232; +const int TK_CaretAssign = 233; +const int TK_BarAssign = 234; +const int TK_DotDotDot = 240; + + +class Scanner +{ + int line, col; + int tokStart; + int inlineDepth; + int count; + char[] tokBuf; + char[] nonTokBuf; + + void pass(char c) { nonTokBuf ~= c; } + void buf(char c) { tokBuf ~= c; } + void token( int id ) + { + /* Leader. */ + if ( nonTokBuf.length > 0 ) { + printf("%.*s", nonTokBuf); + nonTokBuf = ""; + } + + /* Token data. */ + printf("<%d>%.*s", id, tokBuf); + + tokBuf = ""; + } + + int cs, stack, top; + + %%{ + machine Scanner; + + action pass { pass(fc); } + action buf { buf(fc); } + + action emit_slit { token( TK_Slit ); } + action emit_dlit { token( TK_Dlit ); } + action emit_id { token( TK_Id ); } + action emit_integer_decimal { token( TK_IntegerDecimal ); } + action emit_integer_octal { token( TK_IntegerOctal ); } + action emit_integer_hex { token( TK_IntegerHex ); } + action emit_float { token( TK_Float ); } + action emit_symbol { token( tokBuf[0] ); } + action tokst { tokStart = col; } + + # Single and double literals. + slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit; + dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit; + + # Identifiers + id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + float = + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) >tokst $buf %emit_float; + + # Integer decimal. Leading part buffered by float. + integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal; + + # Integer octal. Leading part buffered by float. + integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal; + + # Integer hex. Leading 0 buffered by float. + integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex; + + # Only buffer the second item, first buffered by symbol. */ + namesep = '::' @buf %{token( TK_NameSep );}; + deqs = '==' @buf %{token( TK_EqualsEquals );}; + neqs = '!=' @buf %{token( TK_NotEquals );}; + and_and = '&&' @buf %{token( TK_AndAnd );}; + or_or = '||' @buf %{token( TK_OrOr );}; + mult_assign = '*=' @buf %{token( TK_MultAssign );}; + percent_assign = '%=' @buf %{token( TK_PercentAssign );}; + plus_assign = '+=' @buf %{token( TK_PlusAssign );}; + minus_assign = '-=' @buf %{token( TK_MinusAssign );}; + amp_assign = '&=' @buf %{token( TK_AmpAssign );}; + caret_assign = '^=' @buf %{token( TK_CaretAssign );}; + bar_assign = '|=' @buf %{token( TK_BarAssign );}; + plus_plus = '++' @buf %{token( TK_PlusPlus );}; + minus_minus = '--' @buf %{token( TK_MinusMinus );}; + arrow = '->' @buf %{token( TK_Arrow );}; + arrow_star = '->*' @buf %{token( TK_ArrowStar );}; + dot_star = '.*' @buf %{token( TK_DotStar );}; + + # Buffer both items. * + div_assign = '/=' @{buf('/');buf(fc);} %{token( TK_DivAssign );}; + + # Double dot is sent as two dots. + dot_dot = '..' %{token('.'); buf('.'); token('.');}; + + # Three char compounds, first item already buffered. */ + dot_dot_dot = '...' %{buf('.'); buf('.'); token( TK_DotDotDot );}; + + # All compunds + compound = namesep | deqs | neqs | and_and | or_or | mult_assign | + div_assign | percent_assign | plus_assign | minus_assign | + amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | + arrow | arrow_star | dot_star | dot_dot | dot_dot_dot; + + # Single char symbols. + symbol = + ( punct - [./_"'] ) >tokst $buf %emit_symbol | + # Do not immediately buffer slash, may be start of comment. + '/' >tokst %{ buf('/'); token( '/' ); } | + # Dot covered by float. + '.' %emit_symbol; + + # Comments and whitespace. + commc = '/*' @{pass('/'); pass('*');} ( any* $0 '*/' @1 ) $pass; + commcc = '//' @{pass('/'); pass('/');} ( any* $0 '\n' @1 ) $pass; + whitespace = ( any - ( 0 | 33..126 ) )+ $pass; + + action onEOFChar { + /* On EOF char, write out the non token buffer. */ + printf("%.*s", nonTokBuf); + nonTokBuf = ""; + } + + # Using 0 as eof. If seeingAs a result all null characters get ignored. + EOF = 0 @onEOFChar; + + # All outside code tokens. + tokens = ( + id | slit | dlit | float | integer_decimal | + integer_octal | integer_hex | compound | symbol ); + nontok = ( commc | commcc | whitespace | EOF ); + + position = ( + '\n' @{ line += 1; col = 1; } | + [^\n] @{ col += 1; } )*; + + main := ( ( tokens | nontok )** ) & position; + }%% + + %% write data noprefix; + + void init( ) + { + /* A count of the number of characters in + * a token. Used for % sequences. */ + count = 0; + line = 1; + col = 1; + %% write init; + return 1; + } + + int execute( char* _data, int _len ) + { + char *p = _data; + char *pe = _data + _len; + char *eof = null; + + %% write exec; + + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ) + { + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } +}; + +void test(char[] buf) +{ + Scanner scanner = new Scanner(); + scanner.init(); + scanner.execute( buf.ptr, buf.length ); + + /* The last token is ignored (because there is no next token). Send + * trailing null to force the last token into whitespace. */ + char eof_char = 0; + if ( scanner.execute( &eof_char, 1 ) <= 0 ) { + fprintf(stderr, "cppscan: scan failed\n"); + } +} + +int main() +{ + test( + "/*\n" + " * Copyright \n" + " */\n" + "\n" + "RedTransAp *RedFsmAp::reduceTrans( TransAp *trans )\n" + "{\n" + " RedAction *action = 0;\n" + " if ( trans->actionTable.length() > 0 ) {\n" + " if ( actionMap.insert( trans->actionTable, &action ) )\n" + " action->id = nextActionId++;\n" + " }\n" + " \n" + " RedStateAp *targ = (RedStateAp*)trans->toState;\n" + " if ( action == 0 ) {\n" + " delete trans;\n" + " return 0;\n" + " }\n" + "\n" + " trans->~TransAp();\n" + " inDict = new(trans) RedTransAp( targ, action, nextTransId++ );\n" + " transSet.insert( inDict );\n" + "}\n" + ); + + test( + "->*\n" + ".*\n" + "/*\"*/\n" + "\"/*\"\n" + "L'\"'\n" + "L\"'\"\n" + ); + + return 0; +} + +/+ _____OUTPUT_____ +/* + * Copyright + */ + +<195>RedTransAp <42>*<195>RedFsmAp<197>::<195>reduceTrans<40>( <195>TransAp <42>*<195>trans <41>) +<123>{ + <195>RedAction <42>*<195>action <61>= <218>0<59>; + <195>if <40>( <195>trans<211>-><195>actionTable<46>.<195>length<40>(<41>) <62>> <218>0 <41>) <123>{ + <195>if <40>( <195>actionMap<46>.<195>insert<40>( <195>trans<211>-><195>actionTable<44>, <38>&<195>action <41>) <41>) + <195>action<211>-><195>id <61>= <195>nextActionId<212>++<59>; + <125>} + + <195>RedStateAp <42>*<195>targ <61>= <40>(<195>RedStateAp<42>*<41>)<195>trans<211>-><195>toState<59>; + <195>if <40>( <195>action <223>== <218>0 <41>) <123>{ + <195>delete <195>trans<59>; + <195>return <218>0<59>; + <125>} + + <195>trans<211>-><126>~<195>TransAp<40>(<41>)<59>; + <195>inDict <61>= <195>new<40>(<195>trans<41>) <195>RedTransAp<40>( <195>targ<44>, <195>action<44>, <195>nextTransId<212>++ <41>)<59>; + <195>transSet<46>.<195>insert<40>( <195>inDict <41>)<59>; +<125>} +<214>->* +<215>.* +/*"*/ +<192>"/*" +<193>L'"' +<192>L"'" ++++++++++++++++++/ diff --git a/test/cppscan5.rl b/test/cppscan5.rl new file mode 100644 index 0000000..057725a --- /dev/null +++ b/test/cppscan5.rl @@ -0,0 +1,275 @@ +/* + * @LANG: d + */ + +/* + * Test in and out state actions. + */ + +import std.c.stdio; +import std.string; + +static const int TK_Dlit = 192; +static const int TK_Slit = 193; +static const int TK_Float = 194; +static const int TK_Id = 195; +static const int TK_NameSep = 197; +static const int TK_Arrow = 211; +static const int TK_PlusPlus = 212; +static const int TK_MinusMinus = 213; +static const int TK_ArrowStar = 214; +static const int TK_DotStar = 215; +static const int TK_ShiftLeft = 216; +static const int TK_ShiftRight = 217; +static const int TK_IntegerDecimal = 218; +static const int TK_IntegerOctal = 219; +static const int TK_IntegerHex = 220; +static const int TK_EqualsEquals = 223; +static const int TK_NotEquals = 224; +static const int TK_AndAnd = 225; +static const int TK_OrOr = 226; +static const int TK_MultAssign = 227; +static const int TK_DivAssign = 228; +static const int TK_PercentAssign = 229; +static const int TK_PlusAssign = 230; +static const int TK_MinusAssign = 231; +static const int TK_AmpAssign = 232; +static const int TK_CaretAssign = 233; +static const int TK_BarAssign = 234; +static const int TK_DotDotDot = 240; +static const int TK_Whitespace = 241; +static const int TK_Comment = 242; + +class Scanner +{ + int cs, act; + char *ts, te; + + void token( int tok ) + { + char *data = ts; + int len = te - ts; + printf( "<%i> ", tok ); + for ( int i = 0; i < len; i++ ) + printf( "%c", data[i] ); + printf( "\n" ); + } + + %%{ + + machine Scanner; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) + => { token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) + => { token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + =>{ token( TK_Id );}; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) + => { token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) + => { token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) + => { token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) + => { token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' => {token( TK_NameSep );}; + '==' => {token( TK_EqualsEquals );}; + '!=' => {token( TK_NotEquals );}; + '&&' => {token( TK_AndAnd );}; + '||' => {token( TK_OrOr );}; + '*=' => {token( TK_MultAssign );}; + '/=' => {token( TK_DivAssign );}; + '%=' => {token( TK_PercentAssign );}; + '+=' => {token( TK_PlusAssign );}; + '-=' => {token( TK_MinusAssign );}; + '&=' => {token( TK_AmpAssign );}; + '^=' => {token( TK_CaretAssign );}; + '|=' => {token( TK_BarAssign );}; + '++' => {token( TK_PlusPlus );}; + '--' => {token( TK_MinusMinus );}; + '->' => {token( TK_Arrow );}; + '->*' => {token( TK_ArrowStar );}; + '.*' => {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' => { token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) => { token( ts[0] );}; + + action comment { + token( TK_Comment ); + } + + # Comments and whitespace. + '/*' ( any* $0 '*/' @1 ) => comment; + '//' ( any* $0 '\n' @1 ) => comment; + ( any - 33..126 )+ => { token( TK_Whitespace );}; + + *|; + + }%% + + %% write data noprefix; + + void init( ) + { + %% write init; + } + + void execute( char* data, int len ) + { + char *p = data; + char *pe = data + len; + char *eof = pe; + + %% write exec; + } + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ) + { + if ( cs == error ) + return -1; + if ( cs >= first_final ) + return 1; + return 0; + } +}; + +static const int BUFSIZE = 12; + +void test( char buf[] ) +{ + Scanner scanner = new Scanner(); + scanner.init(); + + scanner.execute( buf.ptr, buf.length ); + if ( scanner.cs == Scanner.error ) { + /* Machine failed before finding a token. */ + printf("PARSE ERROR\n"); + } + scanner.finish(); + return 0; +} + +int main() +{ + test( + "\"\\\"hi\" /*\n" + "*/\n" + "44 .44\n" + "44. 44\n" + "44 . 44\n" + "44.44\n" + "_hithere22" + ); + + test( + "'\\''\"\\n\\d'\\\"\"\n" + "hi\n" + "99\n" + ".99\n" + "99e-4\n" + "->*\n" + "||\n" + "0x98\n" + "0x\n" + "//\n" + "/* * */" + ); + + test( + "'\n" + "'\n" + ); + + return 0; +} + +/+ _____OUTPUT_____ +<192> "\"hi" +<241> +<242> /* +*/ +<241> + +<218> 44 +<241> +<194> .44 +<241> + +<194> 44. +<241> +<218> 44 +<241> + +<218> 44 +<241> +<46> . +<241> +<218> 44 +<241> + +<194> 44.44 +<241> + +<195> _hithere22 +<193> '\'' +<192> "\n\d'\"" +<241> + +<195> hi +<241> + +<218> 99 +<241> + +<194> .99 +<241> + +<194> 99e-4 +<241> + +<214> ->* +<241> + +<226> || +<241> + +<220> 0x98 +<241> + +<218> 0 +<195> x +<241> + +<242> // + +<242> /* * */ +PARSE ERROR ++++++++++++++++++++/ diff --git a/test/cppscan6.rl b/test/cppscan6.rl new file mode 100644 index 0000000..ad2d266 --- /dev/null +++ b/test/cppscan6.rl @@ -0,0 +1,358 @@ +/* + * @LANG: indep + * + * const char *data = ts; + * int len = te - ts; + * cout << "<" << tok << "> "; + * for ( int i = 0; i < len; i++ ) + * cout << data[i]; + * cout << '\n'; + */ +ptr ts; +ptr te; +int act; +int token; +%% +%%{ + machine scanner; + + action comment { + token = 242; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + } + + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | '\\' any )* "'" ) + => { + token = 193; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + ( 'L'? '"' ( [^"\\\n] | '\\' any )* '"' ) + => { + token = 192; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + =>{ + token = 195; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) + => { + token = 194; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]? ) + => { + token = 218; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]? ) + => { + token = 219; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]? ) ) + => { + token = 220; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Only buffer the second item, first buffered by symbol. + '::' => { + token = 197; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '==' => { + token = 223; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '!=' => { + token = 224; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '&&' => { + token = 225; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '||' => { + token = 226; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '*=' => { + token = 227; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '/=' => { + token = 228; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '%=' => { + token = 229; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '+=' => { + token = 230; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '-=' => { + token = 231; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '&=' => { + token = 232; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '^=' => { + token = 233; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '|=' => { + token = 234; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '++' => { + token = 212; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '--' => { + token = 213; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '->' => { + token = 211; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '->*' => { + token = 214; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + '.*' => { + token = 215; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Three char compounds, first item already buffered. + '...' => { + token = 240; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Single char symbols. + ( punct - [_"'] ) => { + token = <int>(first_token_char); + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + + # Comments and whitespace. + '/!' ( any* $0 '!/' @1 ) => comment; + '//' ( any* $0 '\n' @1 ) => comment; + ( any - 33..126 )+ => { + token = 241; + prints "<"; + printi token; + prints "> "; + print_token; + prints "\n"; + }; + *|; +}%% +/* _____INPUT_____ +"\"\\\"hi\" /!\n!/\n44 .44\n44. 44\n44 . 44\n44.44\n_hithere22" +"'\\''\"\\n\\d'\\\"\"\nhi\n99\n.99\n99e-4\n->*\n||\n0x98\n0x\n//\n/! * !/" +"'\n'\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +<192> "\"hi" +<241> +<242> /! +!/ +<241> + +<218> 44 +<241> +<194> .44 +<241> + +<194> 44. +<241> +<218> 44 +<241> + +<218> 44 +<241> +<46> . +<241> +<218> 44 +<241> + +<194> 44.44 +<241> + +<195> _hithere22 +ACCEPT +<193> '\'' +<192> "\n\d'\"" +<241> + +<195> hi +<241> + +<218> 99 +<241> + +<194> .99 +<241> + +<194> 99e-4 +<241> + +<214> ->* +<241> + +<226> || +<241> + +<220> 0x98 +<241> + +<218> 0 +<195> x +<241> + +<242> // + +<242> /! * !/ +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/element1.rl b/test/element1.rl new file mode 100644 index 0000000..0795778 --- /dev/null +++ b/test/element1.rl @@ -0,0 +1,108 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +using namespace std; + +struct LangEl +{ + int key; + const char *name; +}; + +struct Fsm +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( LangEl *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); + +}; + +%%{ + machine Fsm; + + alphtype int; + getkey fpc->key; + variable eof eof_marker; + + action a1 {} + action a2 {} + action a3 {} + + main := ( 1 2* 3 ) + ${cout << fpc->name << endl;} + %/{cout << "accept" << endl;}; +}%% + +%% write data; + +int Fsm::init( ) +{ + %% write init; + return 0; +} + +int Fsm::execute( LangEl *data, int len ) +{ + LangEl *p = data; + LangEl *pe = data + len; + LangEl *eof_marker = pe; + %% write exec; + + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +int Fsm::finish( ) +{ + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +int main( ) +{ + static Fsm fsm; + static LangEl lel[] = { + {1, "one"}, + {2, "two-a"}, + {2, "two-b"}, + {2, "two-c"}, + {3, "three"} + }; + + fsm.init(); + fsm.execute( lel, 5 ); + fsm.finish(); + return 0; +} + +#ifdef _____OUTPUT_____ +one +two-a +two-b +two-c +three +accept +#endif diff --git a/test/element2.rl b/test/element2.rl new file mode 100644 index 0000000..7aa6217 --- /dev/null +++ b/test/element2.rl @@ -0,0 +1,83 @@ +/* + * @LANG: c + */ + +#include <stdio.h> + +struct LangEl +{ + int key; + char *name; +}; + +struct fsm +{ + int cs; +}; + +%%{ + machine fsm; + alphtype int; + getkey fpc->key; + variable cs fsm->cs; + + action a1 {} + action a2 {} + action a3 {} + + main := ( 1 2* 3 ) + ${printf("%s\n", fpc->name);} + %/{printf("accept\n");}; +}%% + +%% write data; + +void fsm_init( struct fsm *fsm ) +{ + %% write init; +} + +void fsm_execute( struct fsm *fsm, struct LangEl *_data, int _len ) +{ + struct LangEl *p = _data; + struct LangEl *pe = _data+_len; + struct LangEl *eof = pe; + + %% write exec; +} + +int fsm_finish( struct fsm *fsm ) +{ + if ( fsm->cs == fsm_error ) + return -1; + if ( fsm->cs >= fsm_first_final ) + return 1; + return 0; +} + +int main() +{ + static struct fsm fsm; + static struct LangEl lel[] = { + {1, "one"}, + {2, "two-a"}, + {2, "two-b"}, + {2, "two-c"}, + {3, "three"} + }; + + fsm_init( &fsm ); + fsm_execute( &fsm, lel, 5 ); + fsm_finish( &fsm ); + + return 0; +} + +#ifdef _____OUTPUT_____ +one +two-a +two-b +two-c +three +accept +#endif diff --git a/test/element3.rl b/test/element3.rl new file mode 100644 index 0000000..66435f4 --- /dev/null +++ b/test/element3.rl @@ -0,0 +1,144 @@ +/* + * @LANG: obj-c + */ + +#include <stdio.h> +#include <objc/Object.h> + +struct LangEl +{ + int key; + char *name; +}; + +@interface Fsm : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (int) executeWithData:( struct LangEl *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end; + + +@implementation Fsm + +%%{ + machine Fsm; + + alphtype int; + getkey fpc->key; + + action a1 {} + action a2 {} + action a3 {} + + main := ( 1 2* 3 ) + ${printf("%s\n", fpc->name);} + %/{printf("accept\n");}; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 0; +} + +- (int) executeWithData:( struct LangEl *)_data len:(int)_len; +{ + struct LangEl *p = _data; + struct LangEl *pe = _data + _len; + struct LangEl *eof = pe; + %% write exec; + + if ( self->cs == Fsm_error ) + return -1; + return ( self->cs >= Fsm_first_final ) ? 1 : 0; +} + +- (int) finish; +{ + if ( self->cs == Fsm_error ) + return -1; + return ( self->cs >= Fsm_first_final ) ? 1 : 0; +} + + +@end + +int main() +{ + static Fsm *fsm; + static struct LangEl lel[] = { + {1, "one"}, + {2, "two-a"}, + {2, "two-b"}, + {2, "two-c"}, + {3, "three"} + }; + + fsm = [[Fsm alloc] init]; + [fsm initFsm]; + [fsm executeWithData:lel len:5]; + [fsm finish]; + + return 0; +} + +@interface Fsm2 : Object +{ + // The current state may be read and written to from outside of the + // machine. From within action code, curs is -1 and writing to it has no + // effect. + @public + int cs; + + @protected + +} + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (int) +executeWithElements:(int) elements +length:(unsigned)length; + +@end + +@implementation Fsm2 +- (int) +executeWithElements:(int)elements +length:(unsigned)length; +{ + return 0; +} +@end + +#ifdef _____OUTPUT_____ +one +two-a +two-b +two-c +three +accept +#endif diff --git a/test/eofact.h b/test/eofact.h new file mode 100644 index 0000000..d547f87 --- /dev/null +++ b/test/eofact.h @@ -0,0 +1,9 @@ +#ifndef _EOFACT_H +#define _EOFACT_H + +struct eofact +{ + int cs; +}; + +#endif diff --git a/test/eofact.rl b/test/eofact.rl new file mode 100644 index 0000000..eeb91b8 --- /dev/null +++ b/test/eofact.rl @@ -0,0 +1,51 @@ +/* + * @LANG: indep + * + * Test works with split code gen. + */ +%% +%%{ + machine eofact; + + action a1 { prints "a1\n"; } + action a2 { prints "a2\n"; } + action a3 { prints "a3\n"; } + action a4 { prints "a4\n"; } + + + main := ( + 'hello' @eof a1 %eof a2 '\n'? | + 'there' @eof a3 %eof a4 + ); + +}%% +/* _____INPUT_____ +"" +"h" +"hell" +"hello" +"hello\n" +"t" +"ther" +"there" +"friend" +_____INPUT_____ */ +/* _____OUTPUT_____ +a1 +a3 +FAIL +a1 +FAIL +a1 +FAIL +a2 +ACCEPT +ACCEPT +a3 +FAIL +a3 +FAIL +a4 +ACCEPT +FAIL +_____OUTPUT_____ */ diff --git a/test/erract1.rl b/test/erract1.rl new file mode 100644 index 0000000..d5c01ea --- /dev/null +++ b/test/erract1.rl @@ -0,0 +1,145 @@ +/* + * @LANG: c++ + */ + +/* + * Test error actions. + */ + +#include <iostream> +#include <stdio.h> +#include <string.h> + +using namespace std; + +struct ErrAct +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine ErrAct; + + action expect_digit_plus_minus { printf(" DIGIT PLUS MINUS\n"); } + action expect_digit { printf(" DIGIT\n"); } + action expect_digit_decimal { printf(" DIGIT DECIMAL\n"); } + + float = ( + ( + [\-+] >err expect_digit_plus_minus %err expect_digit | + "" + ) + ( [0-9] [0-9]* $err expect_digit_decimal ) + ( '.' [0-9]+ $err expect_digit )? + ); + + main := float '\n'; +}%% + +%% write data; + +int ErrAct::init( ) +{ + %% write init; + return 0; +} + +int ErrAct::execute( const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + const char *eof = pe; + %% write exec; + + if ( cs == ErrAct_error ) + return -1; + if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + +int ErrAct::finish( ) +{ + if ( cs == ErrAct_error ) + return -1; + if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +void test( const char *buf ) +{ + ErrAct errAct; + errAct.init(); + errAct.execute( buf, strlen(buf) ); + if ( errAct.finish() > 0 ) + cout << "ACCEPT" << endl; + else + cout << "FAIL" << endl; +} + +int main() +{ + test( "1\n" ); + test( "+1\n" ); + test( "-1\n" ); + test( "1.1\n" ); + test( "+1.1\n" ); + test( "-1.1\n" ); + test( "a\n" ); + test( "-\n" ); + test( "+\n" ); + test( "-a\n" ); + test( "+b\n" ); + test( "1.\n" ); + test( "1d\n" ); + test( "1.d\n" ); + test( "1.1d\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT + DIGIT PLUS MINUS +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT DECIMAL +FAIL + DIGIT +FAIL + DIGIT +FAIL +#endif diff --git a/test/erract2.rl b/test/erract2.rl new file mode 100644 index 0000000..b1fbfbf --- /dev/null +++ b/test/erract2.rl @@ -0,0 +1,92 @@ +/* + * @LANG: indep + * + * Test error actions. + */ +%% +%%{ + machine ErrAct; + + action err_start { prints "err_start\n"; } + action err_all { prints "err_all\n"; } + action err_middle { prints "err_middle\n"; } + action err_out { prints "err_out\n"; } + + action eof_start { prints "eof_start\n"; } + action eof_all { prints "eof_all\n"; } + action eof_middle { prints "eof_middle\n"; } + action eof_out { prints "eof_out\n"; } + + main := ( 'hello' + >err err_start $err err_all <>err err_middle %err err_out + >eof eof_start $eof eof_all <>eof eof_middle %eof eof_out + ) '\n'; +}%% + +/* _____INPUT_____ +"" +"h" +"x" +"he" +"hx" +"hel" +"hex" +"hell" +"helx" +"hello" +"hellx" +"hello\n" +"hellox" +_____INPUT_____ */ + +/* _____OUTPUT_____ +err_start +eof_start +err_all +eof_all +FAIL +err_all +err_middle +eof_all +eof_middle +FAIL +err_start +err_all +FAIL +err_all +err_middle +eof_all +eof_middle +FAIL +err_all +err_middle +FAIL +err_all +err_middle +eof_all +eof_middle +FAIL +err_all +err_middle +FAIL +err_all +err_middle +eof_all +eof_middle +FAIL +err_all +err_middle +FAIL +err_all +err_out +eof_all +eof_out +FAIL +err_all +err_middle +FAIL +ACCEPT +err_all +err_out +FAIL +_____OUTPUT_____ */ diff --git a/test/erract3.rl b/test/erract3.rl new file mode 100644 index 0000000..adfe76c --- /dev/null +++ b/test/erract3.rl @@ -0,0 +1,104 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#define IDENT_BUFLEN 256 + +struct erract +{ + int cs; +}; + +%%{ + machine erract; + variable cs fsm->cs; + + # The data that is to go into the fsm structure. + action hello_fails { printf("hello fails\n");} + + newline = ( any | '\n' @{printf("newline\n");} )*; + hello = 'hello\n'* $lerr hello_fails @eof hello_fails; + main := newline | hello; +}%% + +%% write data; + +void erract_init( struct erract *fsm ) +{ + %% write init; +} + +void erract_execute( struct erract *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + const char *eof = pe; + %% write exec; +} + +int erract_finish( struct erract *fsm ) +{ + if ( fsm->cs == erract_error ) + return -1; + else if ( fsm->cs >= erract_first_final ) + return 1; + return 0; +} + +#include <stdio.h> +#include <string.h> + +struct erract fsm; + +void test( char *buf ) +{ + int len = strlen(buf); + erract_init( &fsm ); + erract_execute( &fsm, buf, len ); + if ( erract_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "hello\n" + "hello\n" + "hello\n" + ); + + test( + "hello\n" + "hello\n" + "hello there\n" + ); + + test( + "hello\n" + "hello\n" + "he" ); + + test( "" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +newline +newline +newline +ACCEPT +newline +newline +hello fails +newline +ACCEPT +newline +newline +hello fails +ACCEPT +ACCEPT +#endif diff --git a/test/erract4.rl b/test/erract4.rl new file mode 100644 index 0000000..bef1139 --- /dev/null +++ b/test/erract4.rl @@ -0,0 +1,135 @@ +/* + * @LANG: obj-c + */ + +#include <stdio.h> +#include <objc/Object.h> + +#define IDENT_BUFLEN 256 + +@interface ErrAct : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation ErrAct + +%%{ + machine ErrAct; + + # The data that is to go into the fsm structure. + action hello_fails { printf("hello fails\n");} + + newline = ( any | '\n' @{printf("newline\n");} )*; + hello = 'hello\n'* $^hello_fails @/hello_fails; + main := newline | hello; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 1; +} + +- (void) executeWithData:(const char *)_data len:(int)_len; +{ + const char *p = _data; + const char *pe = _data + _len; + const char *eof = pe; + %% write exec; +} + +- (int) finish; +{ + if ( cs == ErrAct_error ) + return -1; + else if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + +@end + +#include <stdio.h> +#include <string.h> +#define BUFSIZE 2048 + +ErrAct *fsm; +char buf[BUFSIZE]; + +void test( char *buf ) +{ + int len = strlen(buf); + fsm = [[ErrAct alloc] init]; + + [fsm initFsm]; + [fsm executeWithData:buf len:len]; + if ( [fsm finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( + "hello\n" + "hello\n" + "hello\n" + ); + + test( + "hello\n" + "hello\n" + "hello there\n" + ); + + test( + "hello\n" + "hello\n" + "he" ); + + test( "" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +newline +newline +newline +ACCEPT +newline +newline +hello fails +newline +ACCEPT +newline +newline +hello fails +ACCEPT +ACCEPT +#endif diff --git a/test/erract5.rl b/test/erract5.rl new file mode 100644 index 0000000..0ea6e9a --- /dev/null +++ b/test/erract5.rl @@ -0,0 +1,146 @@ +/* + * @LANG: obj-c + */ + +/* + * Test error actions. + */ + +#include <stdio.h> +#include <string.h> +#include <objc/Object.h> + + +@interface ErrAct : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation ErrAct + +%%{ + machine ErrAct; + + action expect_digit_plus_minus { printf(" DIGIT PLUS MINUS\n"); } + action expect_digit { printf(" DIGIT\n"); } + action expect_digit_decimal { printf(" DIGIT DECIMAL\n"); } + + float = ( + ( + [\-+] >!expect_digit_plus_minus %!expect_digit | + "" + ) + ( [0-9] [0-9]* $!expect_digit_decimal ) + ( '.' [0-9]+ $!expect_digit )? + ); + + main := float '\n'; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 1; +} + +- (void) executeWithData:(const char *)_data len:(int)_len; +{ + const char *p = _data; + const char *pe = _data + _len; + const char *eof = pe; + %% write exec; +} + +- (int) finish; +{ + if ( cs == ErrAct_error ) + return -1; + else if ( cs >= ErrAct_first_final ) + return 1; + return 0; +} + + +@end + +#define BUFSIZE 1024 + +void test( char *buf ) +{ + ErrAct *errAct = [[ErrAct alloc] init]; + [errAct initFsm]; + [errAct executeWithData:buf len:strlen(buf)]; + if ( [errAct finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "1\n" ); + test( "+1\n" ); + test( "-1\n" ); + test( "1.1\n" ); + test( "+1.1\n" ); + test( "-1.1\n" ); + test( "a\n" ); + test( "-\n" ); + test( "+\n" ); + test( "-a\n" ); + test( "+b\n" ); + test( "1.\n" ); + test( "1d\n" ); + test( "1.d\n" ); + test( "1.1d\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT +ACCEPT + DIGIT PLUS MINUS +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT +FAIL + DIGIT DECIMAL +FAIL + DIGIT +FAIL + DIGIT +FAIL +#endif diff --git a/test/erract6.rl b/test/erract6.rl new file mode 100644 index 0000000..688042f --- /dev/null +++ b/test/erract6.rl @@ -0,0 +1,82 @@ +/* + * @LANG: c + */ + +/* + * Test of a transition going to the error state. + */ + +#include <stdio.h> +#define BUFSIZE 2048 + +struct errintrans +{ + int cs; +}; + +%%{ + machine errintrans; + variable cs fsm->cs; + + char = any - (digit | '\n'); + line = char* "\n"; + main := line+; +}%% + +%% write data; + +void errintrans_init( struct errintrans *fsm ) +{ + %% write init; +} + +void errintrans_execute( struct errintrans *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int errintrans_finish( struct errintrans *fsm ) +{ + if ( fsm->cs == errintrans_error ) + return -1; + if ( fsm->cs >= errintrans_first_final ) + return 1; + return 0; +} + + +struct errintrans fsm; +#include <string.h> + +void test( char *buf ) +{ + int len = strlen( buf ); + errintrans_init( &fsm ); + errintrans_execute( &fsm, buf, len ); + if ( errintrans_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( + "good, does not have numbers\n" + ); + + test( + "bad, has numbers 666\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +FAIL +#endif diff --git a/test/erract7.rl b/test/erract7.rl new file mode 100644 index 0000000..040ad73 --- /dev/null +++ b/test/erract7.rl @@ -0,0 +1,42 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +%%{ + machine foo; + + action on_char { printf("char: %c\n", *p); } + action on_err { printf("err: %c\n", *p); } + action to_state { printf("to state: %c\n", *p); } + + main := 'heXXX' $on_char $err(on_err) $to(to_state); +}%% + +%% write data; + +int main() +{ + int cs; + char *p = "hello", *pe = p + strlen(p); + char *eof = pe; + %%{ + write init; + write exec; + }%% + + printf( "rest: %s\n", p ); + + return 0; +} + +#ifdef _____OUTPUT_____ +char: h +to state: h +char: e +to state: e +err: l +rest: llo +#endif diff --git a/test/erract8.rl b/test/erract8.rl new file mode 100644 index 0000000..7926186 --- /dev/null +++ b/test/erract8.rl @@ -0,0 +1,44 @@ +/* + * @LANG: java + */ + +class erract8 +{ + %%{ + machine erract8; + + action on_char { System.out.println("char: " + data[p]); } + action on_err { System.out.println("err: " + data[p]); } + action to_state { System.out.println("to state: " + data[p]); } + + main := 'heXXX' $on_char $err(on_err) $to(to_state); + }%% + + %% write data; + + static void test( char data[] ) + { + int cs, p = 0, pe = data.length; + int eof = pe; + int top; + + %% write init; + %% write exec; + + System.out.println("rest: " + data[p] + data[p+1] + data[p+2]); + } + + public static void main( String args[] ) + { + test( "hello".toCharArray() ); + } +} + +/* _____OUTPUT_____ +char: h +to state: h +char: e +to state: e +err: l +rest: llo +*/ diff --git a/test/erract9.rl b/test/erract9.rl new file mode 100644 index 0000000..ccd848a --- /dev/null +++ b/test/erract9.rl @@ -0,0 +1,43 @@ +# +# @LANG: ruby +# +# Test the host language scanning for ruby. +# + +%%{ + machine erract9; + + action on_char { print("char: ", data[p..p], "\n"); } + action on_err { print("err: ", data[p..p], "\n"); } + action to_state { print("to state: " , data[p..p], "\n"); } + + main := 'heXXX' $on_char $err(on_err) $to(to_state); +}%% + +%% write data; + +def run_machine( data ) + p = 0; + pe = data.length + cs = 0 + + %% write init; + %% write exec; + + print("rest: " , data[p..p+2], "\n") +end + +inp = [ + "hello\n", +] + +inp.each { |str| run_machine(str) } + +=begin _____OUTPUT_____ +char: h +to state: h +char: e +to state: e +err: l +rest: llo +=end _____OUTPUT_____ diff --git a/test/export1.rl b/test/export1.rl new file mode 100644 index 0000000..fe96141 --- /dev/null +++ b/test/export1.rl @@ -0,0 +1,59 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +%%{ + machine test; + + export c1 = 'c'; + export c2 = 'z'; + export c3 = 't'; + + commands := ( + c1 . digit* '\n' @{ printf( "c1\n" );} | + c2 . alpha* '\n' @{ printf( "c2\n" );}| + c3 . '.'* '\n' @{ printf( "c3\n" );} + )*; + + some_other := any*; +}%% + +%% write exports; +%% write data; + +int test( const char *data, int len ) +{ + int cs = test_en_commands; + const char *p = data, *pe = data + len; + + %% write init nocs; + %% write exec; + + if ( cs >= test_first_final ) + printf("ACCEPT\n"); + else + printf("ERROR\n"); + return 0; +} + +char data[] = { + test_ex_c1, '1', '2', '\n', + test_ex_c2, 'a', 'b', '\n', + test_ex_c3, '.', '.', '\n', 0 +}; + +int main() +{ + test( data, strlen( data ) ); + return 0; +} + +#ifdef _____OUTPUT_____ +c1 +c2 +c3 +ACCEPT +#endif diff --git a/test/export2.rl b/test/export2.rl new file mode 100644 index 0000000..881a4c9 --- /dev/null +++ b/test/export2.rl @@ -0,0 +1,57 @@ +/* + * @LANG: java + */ + +class export2 +{ + %%{ + machine test; + + export c1 = 'c'; + export c2 = 'z'; + export c3 = 't'; + + commands := ( + c1 . digit* '\n' @{ System.out.println( "c1" );} | + c2 . alpha* '\n' @{ System.out.println( "c2" );}| + c3 . '.'* '\n' @{ System.out.println( "c3" );} + )*; + + other := any*; + }%% + + %% write exports; + %% write data; + + static void test( char data[] ) + { + int cs = test_en_commands, p = 0, pe = data.length; + int top; + + %% write init nocs; + %% write exec; + + if ( cs >= test_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + + public static void main( String args[] ) + { + char data[] = { + test_ex_c1, '1', '2', '\n', + test_ex_c2, 'a', 'b', '\n', + test_ex_c3, '.', '.', '\n', + }; + test( data ); + } +} + + +/* _____OUTPUT_____ +c1 +c2 +c3 +ACCEPT +*/ diff --git a/test/export3.rl b/test/export3.rl new file mode 100644 index 0000000..dbf74b8 --- /dev/null +++ b/test/export3.rl @@ -0,0 +1,53 @@ +# +# @LANG: ruby +# + +%%{ + machine test; + + export c1 = 'c'; + export c2 = 'z'; + export c3 = 't'; + + commands := ( + c1 . digit* '\n' @{ puts "c1"; } | + c2 . alpha* '\n' @{ puts "c2"; }| + c3 . '.'* '\n' @{ puts "c3"; } + )*; + + other := any*; +}%% + +%% write exports; +%% write data; + +def run_machine( data ) + p = 0; + pe = data.length + cs = test_en_commands + val = 0; + neg = false; + + %% write init nocs; + %% write exec; + if cs >= test_first_final + puts "ACCEPT" + else + puts "FAIL" + end +end + +inp = [ + test_ex_c1, ?1, ?2, ?\n, + test_ex_c2, ?a, ?b, ?\n, + test_ex_c3, ?., ?., ?\n +] + +run_machine( inp ); + +=begin _____OUTPUT_____ +c1 +c2 +c3 +ACCEPT +=end _____OUTPUT_____ diff --git a/test/export4.rl b/test/export4.rl new file mode 100644 index 0000000..94d50e4 --- /dev/null +++ b/test/export4.rl @@ -0,0 +1,59 @@ +/* + * @LANG: d + */ + +import std.c.stdio; +import std.string; + +%%{ + machine test; + + export c1 = 'c'; + export c2 = 'z'; + export c3 = 't'; + + commands := ( + c1 . digit* '\n' @{ printf( "c1\n" );} | + c2 . alpha* '\n' @{ printf( "c2\n" );}| + c3 . '.'* '\n' @{ printf( "c3\n" );} + )*; + + some_other := any*; +}%% + +%% write exports; +%% write data; + +int test( char data[] ) +{ + int cs = test_en_commands; + char *p = data.ptr, pe = data.ptr + data.length; + + %% write init nocs; + %% write exec; + + if ( cs >= test_first_final ) + printf("ACCEPT\n"); + else + printf("ERROR\n"); + return 0; +} + +char data[] = [ + test_ex_c1, '1', '2', '\n', + test_ex_c2, 'a', 'b', '\n', + test_ex_c3, '.', '.', '\n' +]; + +int main() +{ + test( data ); + return 0; +} + +/+ _____OUTPUT_____ +c1 +c2 +c3 +ACCEPT +++++++++++++++++++/ diff --git a/test/fnext1.rl b/test/fnext1.rl new file mode 100644 index 0000000..a2925eb --- /dev/null +++ b/test/fnext1.rl @@ -0,0 +1,81 @@ +/* + * @LANG: c + * + * Tests fnext in combination with fbreak. + */ + +#include <string.h> +#include <stdio.h> + +char comm; +int top; +int stack [32]; + +%%{ + machine fnext; + action break {fbreak;} + + main := 'h' @{ /*h*/ fnext e; fbreak; }; + e := 'e' @{ /*e*/ fnext l; } @{ fbreak; }; + l := 'll' @{ /*ll*/ fnext o; } ${ fbreak; }; + o := |* 'o' { /*o*/ fnext nl; fbreak; }; *|; + nl := '\n' @{ /*nl*/ fbreak; printf("ACCEPT\n"); }; +}%% + +int cs; +char *ts, *te; +int act; + +%% write data; + +void init() +{ + %% write init; +} + +void exec( char *data, int len ) +{ + char *p = data; + char *pe = data + len; + + while ( cs != fnext_error && p < pe ) { + printf( "%c\n", *p ); + %% write exec; + } +} + +void finish( ) +{ + if ( cs >= fnext_first_final ) + printf( "ACCEPT\n" ); + else + printf( "FAIL\n" ); +} + +char *inp[] = { + "hello\n" +}; + +int inplen = 1; + +int main( ) +{ + int i; + for ( i = 0; i < inplen; i++ ) { + init(); + exec( inp[i], strlen(inp[i]) ); + finish(); + } + return 0; +} + +#ifdef _____OUTPUT_____ +h +e +l +l +o + + +ACCEPT +#endif diff --git a/test/forder1.rl b/test/forder1.rl new file mode 100644 index 0000000..9873af4 --- /dev/null +++ b/test/forder1.rl @@ -0,0 +1,98 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct forder +{ + int cs; +}; + +%%{ + machine forder; + variable cs fsm->cs; + + second = 'b' + >{printf("enter b1\n");} + >{printf("enter b2\n");} + ; + + first = 'a' + %{printf("leave a\n");} + @{printf("finish a\n");} + ; + + main := first . second . '\n'; +}%% + +%% write data; + +void forder_init( struct forder *fsm ) +{ + %% write init; +} + +void forder_execute( struct forder *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int forder_finish( struct forder *fsm ) +{ + if ( fsm->cs == forder_error ) + return -1; + if ( fsm->cs >= forder_first_final ) + return 1; + return 0; +} + +struct forder fsm; + +void test( char *buf ) +{ + int len = strlen(buf); + forder_init( &fsm ); + forder_execute( &fsm, buf, len ); + if ( forder_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "ab\n"); + test( "abx\n"); + test( "" ); + + test( + "ab\n" + "fail after newline\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +finish a +leave a +enter b1 +enter b2 +ACCEPT +finish a +leave a +enter b1 +enter b2 +FAIL +FAIL +finish a +leave a +enter b1 +enter b2 +FAIL +#endif diff --git a/test/forder2.rl b/test/forder2.rl new file mode 100644 index 0000000..d92f888 --- /dev/null +++ b/test/forder2.rl @@ -0,0 +1,133 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +/* + * After the fact start and ending transitions. Behaves like constructors of + * and destructors in c++. + */ + +struct forder +{ + int cs; +}; + +%%{ + machine forder; + variable cs fsm->cs; + + inner = 'inner' + >{printf("enter inner\n");} + ${printf("inside inner\n");} + %{printf("leave inner\n");} + ; + + outter = inner + >{printf("enter outter\n");} + ${printf("inside outter\n");} + %{printf("leave outter\n");} + ; + + main := outter . '\n'; +}%% + +%% write data; + +void forder_init( struct forder *fsm ) +{ + %% write init; +} + +void forder_execute( struct forder *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int forder_finish( struct forder *fsm ) +{ + if ( fsm->cs == forder_error ) + return -1; + if ( fsm->cs >= forder_first_final ) + return 1; + return 0; +} + +struct forder fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + forder_init( &fsm ); + forder_execute( &fsm, buf, len ); + if ( forder_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "inner\n"); + + test( + "inner\n" + "foobar\n" + ); + + test( "" ); + test( "\n" ); + test( "inn\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +enter outter +enter inner +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +leave inner +leave outter +ACCEPT +enter outter +enter inner +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +leave inner +leave outter +FAIL +FAIL +FAIL +enter outter +enter inner +inside inner +inside outter +inside inner +inside outter +inside inner +inside outter +FAIL +#endif diff --git a/test/forder3.rl b/test/forder3.rl new file mode 100644 index 0000000..5cb3725 --- /dev/null +++ b/test/forder3.rl @@ -0,0 +1,107 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct forder +{ + int cs; +}; + +%%{ + machine forder; + variable cs fsm->cs; + + m1 = ( "" %{printf("enter m1 aa\n");} | + 'aa'* >{printf("enter m1 aa\n");} %{printf("leave m1 aa\n");} ) + 'b' @{printf("through m1 b\n");} . 'b'* . 'a'*; + + m2 = 'bbb'* 'aa'*; + + main := ( + m1 %{printf("accept m1\n");} | + "" %{printf("enter m2\n");} | + m2 >{printf("enter m2\n");} %{printf("accept m2\n");} + ) . '\n'; +}%% + +%% write data; + +void forder_init( struct forder *fsm ) +{ + %% write init; +} + +void forder_execute( struct forder *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int forder_finish( struct forder *fsm ) +{ + if ( fsm->cs == forder_error ) + return -1; + if ( fsm->cs >= forder_first_final ) + return 1; + return 0; +} + +struct forder fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + forder_init( &fsm ); + forder_execute( &fsm, buf, len ); + if ( forder_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "aaaaaabbbaa\n" ); + test( "\n" ); + test( "bbbbbbaaaaaaa\n" ); + test( "bbbbbbaaaaaa\n" ); + test( "aaaaa\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +enter m1 aa +enter m2 +leave m1 aa +through m1 b +accept m1 +ACCEPT +enter m2 +enter m2 +accept m2 +ACCEPT +enter m1 aa +enter m1 aa +leave m1 aa +through m1 b +enter m2 +accept m1 +ACCEPT +enter m1 aa +enter m1 aa +leave m1 aa +through m1 b +enter m2 +accept m1 +accept m2 +ACCEPT +enter m1 aa +enter m2 +FAIL +#endif diff --git a/test/gotocallret1.rl b/test/gotocallret1.rl new file mode 100644 index 0000000..8b294dd --- /dev/null +++ b/test/gotocallret1.rl @@ -0,0 +1,114 @@ +/* + * @LANG: indep + */ + +/* + * Demonstrate the use of goto, call and return. This machine expects either a + * lower case char or a digit as a command then a space followed by the command + * arg. If the command is a char, then the arg must be an a string of chars. + * If the command is a digit, then the arg must be a string of digits. This + * choice is determined by action code, rather than though transition + * desitinations. + */ + +char comm; +int top; +int stack[32]; +%% +%%{ + machine GotoCallRet; + + # A reference to a state in an unused action caused a segfault in 5.8. */ + action unusedAction { fentry(garble_line); } + + action err_garbling_line { prints "error: garbling line\n"; } + action goto_main { fgoto main; } + action recovery_failed { prints "error: failed to recover\n"; } + + # Error machine, consumes to end of + # line, then starts the main line over. + garble_line := ( (any-'\n')*'\n') + >err_garbling_line + @goto_main + $/recovery_failed; + + action hold_and_return {fhold; fret;} + + # Look for a string of alphas or of digits, + # on anything else, hold the character and return. + alp_comm := alpha+ $!hold_and_return; + dig_comm := digit+ $!hold_and_return; + + # Choose which to machine to call into based on the command. + action comm_arg { + if ( comm >= 'a' ) + fcall alp_comm; + else + fcall dig_comm; + } + + # Specifies command string. Note that the arg is left out. + command = ( + [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n' + ) @{prints "correct command\n";}; + + # Any number of commands. If there is an + # error anywhere, garble the line. + main := command* $!{fhold;fgoto garble_line;}; +}%% +/* _____INPUT_____ +"lkajsdf\n" +"2134\n" +"(\n" +"\n" +"*234234()0909 092 -234aslkf09`1 11\n" +"1\n" +"909\n" +"1 a\n" +"11 1\n" +"a 1\n" +"aa a\n" +"1 1\n" +"1 123456\n" +"a a\n" +"a abcdef\n" +"h" +"a aa1" +_____INPUT_____ */ +/* _____OUTPUT_____ +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +error: garbling line +ACCEPT +correct command +ACCEPT +correct command +ACCEPT +correct command +ACCEPT +correct command +ACCEPT +error: failed to recover +FAIL +error: garbling line +error: failed to recover +FAIL +_____OUTPUT_____ */ diff --git a/test/gotocallret2.rl b/test/gotocallret2.rl new file mode 100644 index 0000000..5b4f740 --- /dev/null +++ b/test/gotocallret2.rl @@ -0,0 +1,77 @@ +/* + * @LANG: indep + */ + +char comm; +int top; +int stack[32]; +ptr ts; +ptr te; +int act; +int val; +%% +%%{ + machine GotoCallRet; + + sp = ' '; + + handle := any @{ + prints "handle "; + fhold; + if ( val == 1 ) fnext *fentry(one); + if ( val == 2 ) fnext *fentry(two); + if ( val == 3 ) fnext main; + }; + + one := |* + '{' => { prints "{ "; fcall *fentry(one); }; + "[" => { prints "[ "; fcall *fentry(two); }; + "}" sp* => { prints "} "; fret; }; + [a-z]+ => { prints "word "; val = 1; fgoto *fentry(handle); }; + ' ' => { prints "space "; }; + *|; + + two := |* + '{' => { prints "{ "; fcall *fentry(one); }; + "[" => { prints "[ "; fcall *fentry(two); }; + ']' sp* => { prints "] "; fret; }; + [a-z]+ => { prints "word "; val = 2; fgoto *fentry(handle); }; + ' ' => { prints "space "; }; + *|; + + main := |* + '{' => { prints "{ "; fcall one; }; + "[" => { prints "[ "; fcall two; }; + [a-z]+ => { prints "word "; val = 3; fgoto handle; }; + [a-z] ' foil' => { prints "this is the foil";}; + ' ' => { prints "space "; }; + '\n'; + *|; +}%% +/* _____INPUT_____ +"{a{b[c d]d}c}\n" +"[a{b[c d]d}c}\n" +"[a[b]c]d{ef{g{h}i}j}l\n" +"{{[]}}\n" +"a b c\n" +"{a b c}\n" +"[a b c]\n" +"{]\n" +"{{}\n" +"[[[[[[]]]]]]\n" +"[[[[[[]]}]]]\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +{ word handle { word handle [ word handle space word handle ] word handle } word handle } ACCEPT +[ word handle { word handle [ word handle space word handle ] word handle } word handle FAIL +[ word handle [ word handle ] word handle ] word handle { word handle { word handle { word handle } word handle } word handle } word handle ACCEPT +{ { [ ] } } ACCEPT +word handle space word handle space word handle ACCEPT +{ word handle space word handle space word handle } ACCEPT +[ word handle space word handle space word handle ] ACCEPT +{ FAIL +{ { } FAIL +[ [ [ [ [ [ ] ] ] ] ] ] ACCEPT +[ [ [ [ [ [ ] ] FAIL +_____OUTPUT_____ */ + diff --git a/test/high1.rl b/test/high1.rl new file mode 100644 index 0000000..9179c89 --- /dev/null +++ b/test/high1.rl @@ -0,0 +1,180 @@ +/* + * @LANG: c + * @ALLOW_GENFLAGS: -T0 -T1 -G0 -G1 -G2 + */ + +/** + * Test a high character to make sure signedness + * isn't messing us up. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +struct high +{ + int cs; +}; + +%%{ + machine high; + variable cs fsm->cs; + + # We Want the header portion. + alphtype unsigned int; + + main := ( + 0x20 .. 0xefffffff @1 @{printf("gothigh1\n");} | + 0xf0000000 @1 @{printf("gothigh1\n");} | + 0x200 .. 0xfe000000 @1 @{printf("gothigh2\n");} | + any @0 @{printf("else\n");} + )*; +}%% + +%% write data; + +void high_init( struct high *fsm ) +{ + %% write init; +} + +void high_execute( struct high *fsm, const unsigned int *_data, int _len ) +{ + const unsigned int *p = _data; + const unsigned int *pe = _data+_len; + + %% write exec; +} + +int high_finish( struct high *fsm ) +{ + if ( fsm->cs == high_error ) + return -1; + if ( fsm->cs >= high_first_final ) + return 1; + return 0; +} + +struct high high; + +#define BUFSIZE 1024 +char cbuf[BUFSIZE]; +unsigned int buf[BUFSIZE]; +int buflen = 0; +char numbuf[9]; +int numlen = 0; + +struct tokenizer +{ + int cs; +}; + +%%{ + machine tokenizer; + variable cs fsm->cs; + + action bufdigit { + if ( numlen < 8 ) + numbuf[numlen++] = fc; + } + + action writeDigit { + /* Null terminate the buffer storing the number and reset. */ + numbuf[numlen] = 0; + numlen = 0; + + /* Store the number in the buf. If the buf is full then + * flush and reset the buffer. */ + buf[buflen++] = strtoul( numbuf, 0, 16 ); + if ( buflen == BUFSIZE ) { + high_execute( &high, buf, BUFSIZE ); + buflen = 0; + } + } + + action finish { + if ( buflen > 0 ) + high_execute( &high, buf, buflen ); + if ( high_finish( &high ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); + } + + num = ( digit | 'a'..'f' )+ $bufdigit %writeDigit; + main := ( num $1 %0 | space )* %/finish; +}%% + +%% write data; + +void tokenizer_init( struct tokenizer *fsm ) +{ + %% write init; +} + +void tokenizer_execute( struct tokenizer *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + const char *eof = pe; + + %% write exec; +} + +int tokenizer_finish( struct tokenizer *fsm ) +{ + if ( fsm->cs == tokenizer_error ) + return -1; + if ( fsm->cs >= tokenizer_first_final ) + return 1; + return 0; +} + +struct tokenizer tok; + +void test( char *cbuf ) +{ + int len = strlen( cbuf ); + high_init( &high ); + tokenizer_init( &tok ); + tokenizer_execute( &tok, cbuf, len ); + if ( tokenizer_finish( &tok ) <= 0 ) + printf("Tokenizer FAIL\n"); +} + +char data[] = + "10 20 30 40 50 200 300 400 \n" + "d0000000 f0000000 fd000000 fe000000\n" + "ff000000 ffffffffffffffffffffffffff\n" + "ff\n"; + +int main() +{ + test( data ); + return 0; +} + +#ifdef _____OUTPUT_____ +else +gothigh1 +gothigh1 +gothigh1 +gothigh1 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh1 +gothigh2 +gothigh2 +gothigh2 +else +else +gothigh1 +ACCEPT +#endif diff --git a/test/high2.rl b/test/high2.rl new file mode 100644 index 0000000..53f1d80 --- /dev/null +++ b/test/high2.rl @@ -0,0 +1,103 @@ +/* + * @LANG: c++ + */ + +/** + * Test a high character to make sure signedness + * isn't messing us up. + */ + +#include <stdio.h> +#include <string.h> + +struct Fsm +{ + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( const unsigned char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Fsm; + + alphtype unsigned char; + + # Indicate we got the high character. + action gothigh { + printf("yes\n"); + } + + main := 0xe8 @gothigh '\n'; +}%% + +%% write data; + +int Fsm::init( ) +{ + %% write init; + return 0; +} + +int Fsm::execute( const unsigned char *_data, int _len ) +{ + const unsigned char *p = _data; + const unsigned char *pe = _data+_len; + %% write exec; + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +int Fsm::finish() +{ + if ( cs == Fsm_error ) + return -1; + if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + +Fsm fsm; + +void test( unsigned char *buf, int len ) +{ + fsm.init(); + fsm.execute( buf, len ); + if ( fsm.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +unsigned char data1[] = { 0xe8, 10 }; +unsigned char data2[] = { 0xf8, 10 }; + +int main() +{ + test( data1, 2 ); + test( data2, 2 ); + return 0; +} + +#ifdef _____OUTPUT_____ +yes +ACCEPT +FAIL +#endif diff --git a/test/high3.rl b/test/high3.rl new file mode 100644 index 0000000..d915bff --- /dev/null +++ b/test/high3.rl @@ -0,0 +1,111 @@ +/* + * @LANG: obj-c + */ + +/** + * Test a high character to make sure signedness + * isn't messing us up. + */ + +#include <stdio.h> +#include <objc/Object.h> + +@interface Fsm : Object +{ +@public + int cs; +}; + +// Initialize the machine. Invokes any init statement blocks. Returns 0 +// if the machine begins in a non-accepting state and 1 if the machine +// begins in an accepting state. +- (int) initFsm; + +// Execute the machine on a block of data. Returns -1 if after processing +// the data, the machine is in the error state and can never accept, 0 if +// the machine is in a non-accepting state and 1 if the machine is in an +// accepting state. +- (void) executeWithData:(const unsigned char *)data len:(int)len; + +// Indicate that there is no more data. Returns -1 if the machine finishes +// in the error state and does not accept, 0 if the machine finishes +// in any other non-accepting state and 1 if the machine finishes in an +// accepting state. +- (int) finish; + +@end + +@implementation Fsm + +%%{ + machine Fsm; + + alphtype unsigned char; + + # Indicate we got the high character. + action gothigh { + printf("yes\n"); + } + + main := 0xe8 @gothigh '\n'; +}%% + +%% write data; + +- (int) initFsm; +{ + %% write init; + return 1; +} + +- (void) executeWithData:(const unsigned char *)_data len:(int)_len; +{ + const unsigned char *p = _data; + const unsigned char *pe = _data + _len; + %% write exec; +} + +- (int) finish; +{ + if ( cs == Fsm_error ) + return -1; + else if ( cs >= Fsm_first_final ) + return 1; + return 0; +} + + +@end + + +#define BUFSIZE 2048 + +Fsm *fsm; +unsigned char buf[BUFSIZE]; + +void test( unsigned char *buf, int len ) +{ + fsm = [[Fsm alloc] init]; + [fsm initFsm]; + [fsm executeWithData:buf len:len]; + if ( [fsm finish] > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +unsigned char data1[] = { 0xe8, 10 }; +unsigned char data2[] = { 0xf8, 10 }; + +int main() +{ + test( data1, 2 ); + test( data2, 2 ); + return 0; +} + +#ifdef _____OUTPUT_____ +yes +ACCEPT +FAIL +#endif diff --git a/test/import1.rl b/test/import1.rl new file mode 100644 index 0000000..e721f56 --- /dev/null +++ b/test/import1.rl @@ -0,0 +1,73 @@ +/* + * @LANG: c + */ + +#include <stdio.h> + +char *foo = "foo"; + +char b = 98; +char a = 97; +char r = 114; + +#define SP 32 +#define NL '\n' + +%%{ + machine tmp; + import "import1.rl"; + + foobar = + foo @{printf("foo\n"); } | + b a r @{printf("bar\n");}; + + main := ( foobar SP foobar NL )*; +}%% + +%% write data; + +int cs; + +void exec_str( char *p, int len ) +{ + char *pe = p + len; + %% write exec; +} + +void exec_c( char c ) +{ + exec_str( &c, 1 ); +} + +int main() +{ + %% write init; + + exec_str( foo, 3 ); + exec_c( SP ); + exec_c( b ); + exec_c( a ); + exec_c( r ); + exec_c( NL ); + + exec_c( b ); + exec_c( a ); + exec_c( r ); + exec_c( SP ); + exec_str( foo, 3 ); + exec_c( NL ); + + if ( cs < tmp_first_final ) + printf("FAIL\n"); + else + printf("ACCEPT\n"); + + return 0; +} +#ifdef _____OUTPUT_____ +foo +bar +bar +foo +ACCEPT +#endif diff --git a/test/include1.rl b/test/include1.rl new file mode 100644 index 0000000..30145de --- /dev/null +++ b/test/include1.rl @@ -0,0 +1,28 @@ +/* + * @LANG: c + * @IGNORE: yes + * + * Provides definitions for include tests. + */ + +%%{ + machine include_test_1; + + action A {printf(" a1");} + action B {printf(" b1");} + + action NonRef1 {printf(" nr1");} + + a1 = 'a' @A; + b1 = 'b' @B; +}%% + +%%{ + machine include_test_2; + + action NonRef2 {printf(" nr2");} + + a2 = 'a' @{printf(" a2");}; + b2 = 'b' @{printf(" b2");}; +}%% + diff --git a/test/include2.rl b/test/include2.rl new file mode 100644 index 0000000..68ab007 --- /dev/null +++ b/test/include2.rl @@ -0,0 +1,52 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +%%{ + machine include_test_4; + + action NonRef3 {printf(" nr3");} + + a3 = 'a'@{printf(" a3");}; + b3 = 'b'@{printf(" b3");}; + +}%% + +%%{ + machine include_test_1; + + include "include1.rl"; + + include include_test_2 "include1.rl"; + + include include_test_4; + + main := + a1 b1 @NonRef1 + a2 b2 @NonRef2 + a3 b3 @NonRef3 + 0 @{fbreak;}; +}%% + +%% write data; + +void test( char *p ) +{ + int cs; + %% write init; + %% write exec noend; + printf("\n"); +} + +int main() +{ + test( "ababab" ); + return 0; +} + +#ifdef _____OUTPUT_____ + a1 b1 nr1 a2 b2 nr2 a3 b3 nr3 +#endif diff --git a/test/java1.rl b/test/java1.rl new file mode 100644 index 0000000..461d177 --- /dev/null +++ b/test/java1.rl @@ -0,0 +1,48 @@ +/* + * @LANG: java + */ + +class java1 +{ + %%{ + machine java1; + + one := 'one\n'; + two := 'two\n'; + four := 'four\n'; + + main := + ( 'hello' | 'there' | 'friend' ) + '\n' @{int s = fentry(one); fgoto *s; char c = fc;} + ( 'one' | 'two' | 'four' ) '\n'; + }%% + + %% write data; + + static void test( char data[] ) + { + int cs, p = 0, pe = data.length; + int top; + + %% write init; + %% write exec; + + if ( cs >= java1_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + + public static void main( String args[] ) + { + test( "hello\none\n".toCharArray() ); + test( "there\ntwo\n".toCharArray() ); + test( "friend\nfour\n".toCharArray() ); + } +} + +/* _____OUTPUT_____ +ACCEPT +FAIL +FAIL +*/ diff --git a/test/java2.rl b/test/java2.rl new file mode 100644 index 0000000..f308902 --- /dev/null +++ b/test/java2.rl @@ -0,0 +1,50 @@ +/* + * @LANG: java + */ + +class java2 +{ + %%{ + machine java1; + alphtype int; + + main := 1 2 3 4 ( + 5 6 7 8 | + 9 10 11 12 + ) 1073741824; + + }%% + + %% write data; + + static void test( int data[] ) + { + int cs, p = 0, pe = data.length; + int top; + + %% write init; + %% write exec; + + if ( cs >= java1_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + + static final int t1[] = { 1, 2, 3, 4, 5, 6, 7, 8, 1073741824 }; + static final int t2[] = { 1, 2, 3, 4, 9, 10, 11, 12, 1073741824 }; + static final int t3[] = { 1, 2, 3, 4, 1073741824 }; + + public static void main( String args[] ) + { + test( t1 ); + test( t2 ); + test( t3 ); + } +} + +/* _____OUTPUT_____ +ACCEPT +ACCEPT +FAIL +*/ diff --git a/test/keller1.rl b/test/keller1.rl new file mode 100644 index 0000000..646cec9 --- /dev/null +++ b/test/keller1.rl @@ -0,0 +1,1075 @@ +/* + * @LANG: c++ + */ + +/* + * Automatically generated by keller. Do not edit. + * + * Parts of this file are copied from Keller source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Keller source without restriction. The remainder is derived from + * "tmp.gmr" and inherits the copyright status of that file. + */ + +#line 1 "tmp.gmr" +#include <iostream> +using std::cout; +using std::endl; + + +#line 16 "tmp.rl" +enum token_type_e { + tt_id, + tt_equals, + tt_semi, + tt_pipe, + tt_amp, + tt_minus, + tt_dot, + tt_colon, + tt_percent, + tt_dollar, + tt_plus, + tt_number, + tt_star, + tt_question, + tt_not, + tt_andFSM, + tt_orFSM, + tt_open, + tt_close +}; + +struct LangEl +{ + int line, lineEnd; + int pos; + + int type; + int state; + LangEl *prev, *next; +}; + +struct Token : public LangEl +{ + const char *value; +}; + +struct Lel_start : public LangEl +{ +#line 32 "tmp.gmr" + + int si; +#line 59 "tmp.rl" +}; + +struct Lel_M : public LangEl +{ +#line 36 "tmp.gmr" + + int mi; +#line 67 "tmp.rl" +}; + +#define l__error 19 +#define l_tt_id 0 +#define l_tt_equals 1 +#define l_tt_semi 2 +#define l_tt_pipe 3 +#define l_tt_amp 4 +#define l_tt_minus 5 +#define l_tt_dot 6 +#define l_tt_colon 7 +#define l_tt_percent 8 +#define l_tt_dollar 9 +#define l_tt_plus 10 +#define l_tt_number 11 +#define l_tt_star 12 +#define l_tt_question 13 +#define l_tt_not 14 +#define l_tt_andFSM 15 +#define l_tt_orFSM 16 +#define l_tt_open 17 +#define l_tt_close 18 +#define l_start 23 +#define l_M 24 +#define l_A 25 +#define l_E 26 +#define l_T 27 +#define l_N 28 +#define l_K 29 +#define l_F 30 +#define l__start 31 +#define l__eof 20 + +struct LangEl; + +struct Parser +{ + Parser(); + + void parseLangEl( LangEl *langEl ); + int done( ); + + void push( LangEl *lel ) { + lel->prev = stack; + stack = lel; + } + LangEl *pop() { + LangEl *ret = stack; + stack = stack->prev; + return ret; + } + int pop( int n ); + void rem( LangEl *lel, int n ); + LangEl *stack; + int next; + LangEl *redLel; + LangEl *rhs[10]; + + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + int init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( LangEl *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + + +%%{ + machine Parser; + + getkey fpc->type; + + action shift { + fpc->state = fcurs; + push( fpc ); + } + + action pop1 { fnext *pop(1); } + action pop2 { fnext *pop(2); } + action pop3 { fnext *pop(3); } + action pop4 { fnext *pop(4); } + + action new_error { + redLel = new LangEl(); + redLel->type = 19; + } + + action newstart { + redLel = new Lel_start(); + redLel->type = 23; + } + + action newM { + redLel = new Lel_M(); + redLel->type = 24; + } + + action newA { + redLel = new LangEl(); + redLel->type = 25; + } + + action newE { + redLel = new LangEl(); + redLel->type = 26; + } + + action newT { + redLel = new LangEl(); + redLel->type = 27; + } + + action newN { + redLel = new LangEl(); + redLel->type = 28; + } + + action newK { + redLel = new LangEl(); + redLel->type = 29; + } + + action newF { + redLel = new LangEl(); + redLel->type = 30; + } + + action new_eof { + redLel = new LangEl(); + redLel->type = 20; + } + + action new_epsilon { + redLel = new LangEl(); + redLel->type = 21; + } + + action new_null { + redLel = new LangEl(); + redLel->type = 22; + } + + action rem1 { rem(fpc, 1); } + action rem2 { rem(fpc, 2); } + action rem3 { rem(fpc, 3); } + action rem4 { rem(fpc, 4); } + + action r_start_0 + { +#line 41 "tmp.gmr" + + cout << "start = M;" << endl; + static_cast<Lel_start*>(redLel)->si = static_cast<Lel_M*>(rhs[0])->mi; + +#line 214 "tmp.rl" + } + + action r_M_0 + { +#line 44 "tmp.gmr" + cout << "M = M A;" << endl; +#line 221 "tmp.rl" + } + + action r_M_1 + { +#line 45 "tmp.gmr" + cout << "M = A;" << endl; +#line 228 "tmp.rl" + } + + action r_A_0 + { +#line 46 "tmp.gmr" + cout << "A = tt_id tt_equals E tt_semi;" << endl; +#line 235 "tmp.rl" + } + + action r_E_0 + { +#line 47 "tmp.gmr" + cout << "E = E tt_pipe T;" << endl; +#line 242 "tmp.rl" + } + + action r_E_1 + { +#line 48 "tmp.gmr" + cout << "E = E tt_amp T;" << endl; +#line 249 "tmp.rl" + } + + action r_E_2 + { +#line 49 "tmp.gmr" + cout << "E = E tt_minus T;" << endl; +#line 256 "tmp.rl" + } + + action r_E_3 + { +#line 50 "tmp.gmr" + cout << "E = T;" << endl; +#line 263 "tmp.rl" + } + + action r_T_0 + { +#line 51 "tmp.gmr" + cout << "T = T tt_dot N;" << endl; +#line 270 "tmp.rl" + } + + action r_T_1 + { +#line 52 "tmp.gmr" + cout << "T = T N;" << endl; +#line 277 "tmp.rl" + } + + action r_T_2 + { +#line 53 "tmp.gmr" + cout << "T = N;" << endl; +#line 284 "tmp.rl" + } + + action r_N_0 + { +#line 54 "tmp.gmr" + cout << "N = N tt_colon tt_id;" << endl; +#line 291 "tmp.rl" + } + + action r_N_1 + { +#line 55 "tmp.gmr" + cout << "N = N tt_percent tt_id;" << endl; +#line 298 "tmp.rl" + } + + action r_N_2 + { +#line 56 "tmp.gmr" + cout << "N = N tt_dollar tt_id;" << endl; +#line 305 "tmp.rl" + } + + action r_N_3 + { +#line 57 "tmp.gmr" + cout << "N = N tt_colon tt_plus tt_number;" << endl; +#line 312 "tmp.rl" + } + + action r_N_4 + { +#line 58 "tmp.gmr" + cout << "N = N tt_colon tt_minus tt_number;" << endl; +#line 319 "tmp.rl" + } + + action r_N_5 + { +#line 59 "tmp.gmr" + cout << "N = N tt_percent tt_plus tt_number;" << endl; +#line 326 "tmp.rl" + } + + action r_N_6 + { +#line 60 "tmp.gmr" + cout << "N = N tt_percent tt_minus tt_number;" << endl; +#line 333 "tmp.rl" + } + + action r_N_7 + { +#line 61 "tmp.gmr" + cout << "N = N tt_dollar tt_plus tt_number;" << endl; +#line 340 "tmp.rl" + } + + action r_N_8 + { +#line 62 "tmp.gmr" + cout << "N = N tt_dollar tt_minus tt_number;" << endl; +#line 347 "tmp.rl" + } + + action r_N_9 + { +#line 63 "tmp.gmr" + cout << "N = K;" << endl; +#line 354 "tmp.rl" + } + + action r_K_0 + { +#line 64 "tmp.gmr" + cout << "K = F tt_star;" << endl; +#line 361 "tmp.rl" + } + + action r_K_1 + { +#line 65 "tmp.gmr" + cout << "K = F tt_question;" << endl; +#line 368 "tmp.rl" + } + + action r_K_2 + { +#line 66 "tmp.gmr" + cout << "K = F tt_plus;" << endl; +#line 375 "tmp.rl" + } + + action r_K_3 + { +#line 67 "tmp.gmr" + cout << "K = F;" << endl; +#line 382 "tmp.rl" + } + + action r_K_4 + { +#line 68 "tmp.gmr" + cout << "K = tt_not F tt_star;" << endl; +#line 389 "tmp.rl" + } + + action r_K_5 + { +#line 69 "tmp.gmr" + cout << "K = tt_not F tt_question;" << endl; +#line 396 "tmp.rl" + } + + action r_K_6 + { +#line 70 "tmp.gmr" + cout << "K = tt_not F tt_plus;" << endl; +#line 403 "tmp.rl" + } + + action r_K_7 + { +#line 71 "tmp.gmr" + cout << "K = tt_not F;" << endl; +#line 410 "tmp.rl" + } + + action r_F_0 + { +#line 72 "tmp.gmr" + cout << "F = tt_andFSM;" << endl; +#line 417 "tmp.rl" + } + + action r_F_1 + { +#line 73 "tmp.gmr" + cout << "F = tt_orFSM;" << endl; +#line 424 "tmp.rl" + } + + action r_F_2 + { +#line 74 "tmp.gmr" + cout << "F = tt_id;" << endl; +#line 431 "tmp.rl" + } + + action r_F_3 + { +#line 75 "tmp.gmr" + cout << "F = tt_open E tt_close;" << endl; +#line 438 "tmp.rl" + } + + main := + s0: start: ( + 23 @shift -> s1 | + 25 @shift -> s3 | + 24 @shift -> s4 | + 0 @shift -> s5 + ), + s1: ( + 20 @shift -> s54 + ), + s2: ( + (0|20) @pop2 @newM @r_M_0 @rem2 -> s54 + ), + s3: ( + (0|20) @pop1 @newM @r_M_1 @rem1 -> s54 + ), + s4: ( + 20 @pop1 @newstart @r_start_0 @rem1 -> s54 | + 25 @shift -> s2 | + 0 @shift -> s5 + ), + s5: ( + 1 @shift -> s6 + ), + s6: ( + 26 @shift -> s8 | + 27 @shift -> s9 | + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s7: ( + (0|20) @pop4 @newA @r_A_0 @rem4 -> s54 + ), + s8: ( + 2 @shift -> s7 | + 3 @shift -> s37 | + 4 @shift -> s38 | + 5 @shift -> s39 + ), + s9: ( + (2..5|18) @pop1 @newE @r_E_3 @rem1 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s10: ( + (0|2..9|14..18) @pop3 @newN @r_N_0 @rem3 -> s54 + ), + s11: ( + (0|2..9|14..18) @pop3 @newN @r_N_1 @rem3 -> s54 + ), + s12: ( + (0|2..9|14..18) @pop3 @newN @r_N_2 @rem3 -> s54 + ), + s13: ( + 11 @shift -> s14 + ), + s14: ( + (0|2..9|14..18) @pop4 @newN @r_N_3 @rem4 -> s54 + ), + s15: ( + 11 @shift -> s16 + ), + s16: ( + (0|2..9|14..18) @pop4 @newN @r_N_4 @rem4 -> s54 + ), + s17: ( + 11 @shift -> s18 + ), + s18: ( + (0|2..9|14..18) @pop4 @newN @r_N_5 @rem4 -> s54 + ), + s19: ( + 11 @shift -> s20 + ), + s20: ( + (0|2..9|14..18) @pop4 @newN @r_N_6 @rem4 -> s54 + ), + s21: ( + 11 @shift -> s22 + ), + s22: ( + (0|2..9|14..18) @pop4 @newN @r_N_7 @rem4 -> s54 + ), + s23: ( + 11 @shift -> s24 + ), + s24: ( + (0|2..9|14..18) @pop4 @newN @r_N_8 @rem4 -> s54 + ), + s25: ( + (0|2..9|14..18) @pop1 @newN @r_N_9 @rem1 -> s54 + ), + s26: ( + (0|2..6|14..18) @pop1 @newT @r_T_2 @rem1 -> s54 | + 7 @shift -> s27 | + 8 @shift -> s28 | + 9 @shift -> s29 + ), + s27: ( + 0 @shift -> s10 | + 10 @shift -> s13 | + 5 @shift -> s15 + ), + s28: ( + 0 @shift -> s11 | + 10 @shift -> s17 | + 5 @shift -> s19 + ), + s29: ( + 0 @shift -> s12 | + 10 @shift -> s21 | + 5 @shift -> s23 + ), + s30: ( + (0|2..9|14..18) @pop2 @newK @r_K_0 @rem2 -> s54 + ), + s31: ( + (0|2..9|14..18) @pop2 @newK @r_K_1 @rem2 -> s54 + ), + s32: ( + (0|2..9|14..18) @pop2 @newK @r_K_2 @rem2 -> s54 + ), + s33: ( + (0|2..9|14..18) @pop1 @newK @r_K_3 @rem1 -> s54 | + 12 @shift -> s30 | + 13 @shift -> s31 | + 10 @shift -> s32 + ), + s34: ( + (0|2..6|14..18) @pop2 @newT @r_T_1 @rem2 -> s54 | + 7 @shift -> s27 | + 8 @shift -> s28 | + 9 @shift -> s29 + ), + s35: ( + 27 @shift -> s9 | + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 26 @shift -> s40 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s36: ( + (0|2..10|12..18) @pop3 @newF @r_F_3 @rem3 -> s54 + ), + s37: ( + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 | + 27 @shift -> s53 + ), + s38: ( + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 | + 27 @shift -> s52 + ), + s39: ( + 29 @shift -> s25 | + 28 @shift -> s26 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 27 @shift -> s42 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s40: ( + 18 @shift -> s36 | + 3 @shift -> s37 | + 4 @shift -> s38 | + 5 @shift -> s39 + ), + s41: ( + 29 @shift -> s25 | + 30 @shift -> s33 | + 17 @shift -> s35 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 | + 28 @shift -> s51 + ), + s42: ( + (2..5|18) @pop3 @newE @r_E_2 @rem3 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s43: ( + (0|2..9|14..18) @pop3 @newK @r_K_4 @rem3 -> s54 + ), + s44: ( + (0|2..9|14..18) @pop3 @newK @r_K_5 @rem3 -> s54 + ), + s45: ( + (0|2..9|14..18) @pop3 @newK @r_K_6 @rem3 -> s54 + ), + s46: ( + 17 @shift -> s35 | + 30 @shift -> s47 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s47: ( + (0|2..9|14..18) @pop2 @newK @r_K_7 @rem2 -> s54 | + 12 @shift -> s43 | + 13 @shift -> s44 | + 10 @shift -> s45 + ), + s48: ( + (0|2..10|12..18) @pop1 @newF @r_F_0 @rem1 -> s54 + ), + s49: ( + (0|2..10|12..18) @pop1 @newF @r_F_1 @rem1 -> s54 + ), + s50: ( + (0|2..10|12..18) @pop1 @newF @r_F_2 @rem1 -> s54 + ), + s51: ( + (0|2..6|14..18) @pop3 @newT @r_T_0 @rem3 -> s54 | + 7 @shift -> s27 | + 8 @shift -> s28 | + 9 @shift -> s29 + ), + s52: ( + (2..5|18) @pop3 @newE @r_E_1 @rem3 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s53: ( + (2..5|18) @pop3 @newE @r_E_0 @rem3 -> s54 | + 29 @shift -> s25 | + 30 @shift -> s33 | + 28 @shift -> s34 | + 17 @shift -> s35 | + 6 @shift -> s41 | + 14 @shift -> s46 | + 15 @shift -> s48 | + 16 @shift -> s49 | + 0 @shift -> s50 + ), + s54: ( + '' -> final + ) + ; +}%% + +%% write data; + +Parser::Parser( ) +{ } + +int Parser::init( ) +{ + %% write init; + return 0; +} + +int Parser::execute( LangEl *_data, int _len ) +{ + LangEl *p = _data; + LangEl *pe = _data+_len; + %% write exec; + if ( cs == Parser_error ) + return -1; + if ( cs >= Parser_first_final ) + return 1; + return 0; +} + +int Parser::finish( ) +{ + if ( cs == Parser_error ) + return -1; + if ( cs >= Parser_first_final ) + return 1; + return 0; +} + +void Parser::parseLangEl( LangEl *lel ) +{ + redLel = 0; + execute( lel, 1 ); + while ( redLel != 0 ) { + execute( redLel, 1 ); + redLel = 0; + execute( lel, 1 ); + } +} + +int Parser::pop( int n ) +{ + for ( int i = n-1; i >= 0; i-- ) + rhs[i] = pop(); + return rhs[0]->state; +} + +void Parser::rem( LangEl *lel, int n ) +{ + for ( int i = n-1; i >= 0; i-- ) + delete rhs[i]; +} + +int Parser::done( ) +{ + Token *eof = new Token; + eof->type = l__eof; + eof->line = 0; + eof->pos = 0; + parseLangEl( eof ); + return finish(); +} + +#line 77 "tmp.gmr" + + +#include <assert.h> +#define MAX_TOKS 10000 + +struct TokList +{ + TokList() : numToks(0) { } + + void append( int type ); + int parse(); + + Token *toks[MAX_TOKS]; + int numToks; +}; + +void TokList::append( int type ) +{ + assert( numToks < MAX_TOKS ); + toks[numToks] = new Token; + toks[numToks]->type = type; + numToks += 1; +} + +int TokList::parse() +{ + Parser parser; + parser.init(); + for ( int i = 0; i < numToks; i++ ) + parser.parseLangEl( toks[i] ); + return parser.done(); +} + +void test0() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_star ); + tokList.append( tt_minus ); + tokList.append( tt_andFSM ); + tokList.append( tt_dot ); + tokList.append( tt_id ); + tokList.append( tt_semi ); + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_andFSM ); + tokList.append( tt_id ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} + +void test1() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_open ); + tokList.append( tt_orFSM ); + tokList.append( tt_minus ); + tokList.append( tt_andFSM ); + tokList.append( tt_close ); + tokList.append( tt_star ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} +void test2() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_not ); + tokList.append( tt_open ); + tokList.append( tt_orFSM ); + tokList.append( tt_minus ); + tokList.append( tt_not ); + tokList.append( tt_andFSM ); + tokList.append( tt_close ); + tokList.append( tt_star ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} +void test3() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_colon ); + tokList.append( tt_minus ); + tokList.append( tt_number ); + tokList.append( tt_id ); + tokList.append( tt_colon ); + tokList.append( tt_id ); + tokList.append( tt_id ); + tokList.append( tt_dollar ); + tokList.append( tt_plus ); + tokList.append( tt_number ); + tokList.append( tt_id ); + tokList.append( tt_percent ); + tokList.append( tt_minus ); + tokList.append( tt_number ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} +void test4() +{ + TokList tokList; + tokList.append( tt_id ); + tokList.append( tt_equals ); + tokList.append( tt_id ); + tokList.append( tt_pipe ); + tokList.append( tt_id ); + tokList.append( tt_amp ); + tokList.append( tt_id ); + tokList.append( tt_minus ); + tokList.append( tt_id ); + tokList.append( tt_semi ); + cout << tokList.parse() << endl; +} + +int main() +{ + test0(); + test1(); + test2(); + test3(); + test4(); +} + +#ifdef _____OUTPUT_____ +F = tt_id; +K = F tt_star; +N = K; +T = N; +E = T; +F = tt_andFSM; +K = F; +N = K; +T = N; +F = tt_id; +K = F; +N = K; +T = T tt_dot N; +E = E tt_minus T; +A = tt_id tt_equals E tt_semi; +M = A; +F = tt_id; +K = F; +N = K; +T = N; +F = tt_andFSM; +K = F; +N = K; +T = T N; +F = tt_id; +K = F; +N = K; +T = T N; +E = T; +A = tt_id tt_equals E tt_semi; +M = M A; +start = M; +1 +F = tt_orFSM; +K = F; +N = K; +T = N; +E = T; +F = tt_andFSM; +K = F; +N = K; +T = N; +E = E tt_minus T; +F = tt_open E tt_close; +K = F tt_star; +N = K; +T = N; +E = T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +F = tt_orFSM; +K = F; +N = K; +T = N; +E = T; +F = tt_andFSM; +K = tt_not F; +N = K; +T = N; +E = E tt_minus T; +F = tt_open E tt_close; +K = tt_not F tt_star; +N = K; +T = N; +E = T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +F = tt_id; +K = F; +N = K; +N = N tt_colon tt_minus tt_number; +T = N; +F = tt_id; +K = F; +N = K; +N = N tt_colon tt_id; +T = T N; +F = tt_id; +K = F; +N = K; +N = N tt_dollar tt_plus tt_number; +T = T N; +F = tt_id; +K = F; +N = K; +N = N tt_percent tt_minus tt_number; +T = T N; +E = T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +F = tt_id; +K = F; +N = K; +T = N; +E = T; +F = tt_id; +K = F; +N = K; +T = N; +E = E tt_pipe T; +F = tt_id; +K = F; +N = K; +T = N; +E = E tt_amp T; +F = tt_id; +K = F; +N = K; +T = N; +E = E tt_minus T; +A = tt_id tt_equals E tt_semi; +M = A; +start = M; +1 +#endif diff --git a/test/langtrans_c.sh b/test/langtrans_c.sh new file mode 100755 index 0000000..2523b0a --- /dev/null +++ b/test/langtrans_c.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 + +# Get the machine name. +machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' $file` + +# Make a temporary version of the test case using the C language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_c.txl > $file.pr + +needs_eof=`sed '/@NEEDS_EOF/s/^.*$/yes/p;d' $file` +if [ "$needs_eof" != 'yes' ]; then + needs_eof=`sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin checkeofact.txl` +fi + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: c + * @GENERATED: yes +EOF + +grep '@ALLOW_GENFLAGS:' $file +grep '@ALLOW_MINFLAGS:' $file + +cat << EOF + */ +#include <string.h> +#include <stdio.h> +EOF + +# Write the data declarations +sed -n '/^%%$/q;p' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/p' $file.pr + +# Write out the init and execute routines. +cat << EOF +int cs; +%% write data; +void init() +{ +EOF + +sed -n '0,/^%%$/d; /^%%{$/q; {s/^/\t/;p}' $file.pr + +cat << EOF + %% write init; +} + +void exec( char *data, int len ) +{ + char *p = data; + char *pe = data + len; +EOF + +[ "$needs_eof" = "yes" ] && echo "char *eof = pe;" + +cat << EOF + %% write exec; +} + +void finish( ) +{ + if ( cs >= ${machine}_first_final ) + printf( "ACCEPT\\n" ); + else + printf( "FAIL\\n" ); +} +EOF + +# Write out the test data. +sed -n '0,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print "char *inp[] = {" +} +{ + print " " $0 "," +} +END { + print "};" + print "" + print "int inplen = " NR ";" +}' + +# Write out the main routine. +cat << EOF + +int main( ) +{ + int i; + for ( i = 0; i < inplen; i++ ) { + init(); + exec( inp[i], strlen(inp[i]) ); + finish(); + } + return 0; +} +#ifdef _____OUTPUT_____ +EOF + +# Write out the expected output. +sed -n '0,/\/\* _____OUTPUT_____/d; /_____OUTPUT_____ \*\//q; p;' $file +echo "#endif" + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_c.txl b/test/langtrans_c.txl new file mode 100644 index 0000000..5a4971f --- /dev/null +++ b/test/langtrans_c.txl @@ -0,0 +1,335 @@ +include "testcase.txl" + +define c_statements + [repeat c_lang_stmt] +end define + +define c_lang_stmt + [al_ragel_stmt] + | [c_variable_decl] + | [c_expr_stmt] + | [c_if_stmt] + | [EX] '{ [IN] [NL] [c_statements] [EX] '} [IN] [NL] +end define + +define c_variable_decl + [c_type_decl] [id] [opt union] '; [NL] +end define + +define c_type_decl + [al_type_decl] + | 'char '* +end define + +define c_expr_stmt + [c_expr] '; [NL] +end define + +define c_expr + [c_term] [repeat c_expr_extend] +end define + +define c_expr_extend + [al_expr_op] [c_term] +end define + +define c_term + [al_term] + | [id] '( [c_args] ') +end define + +define c_args + [list c_expr] +end define + +define c_sign + '- | '+ +end define + +define c_if_stmt + 'if '( [c_expr] ') [NL] [IN] + [c_lang_stmt] [EX] + [opt c_else] +end define + +define c_else + 'else [NL] [IN] + [c_lang_stmt] [EX] +end define + +define c_lang + [c_statements] + '%% [NL] + [c_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [c_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [c_statements] [EX] '} [NL] +end define + +redefine cond_action_stmt + 'action [id] '{ [al_expr] '} [NL] + | 'action [id] '{ [c_expr] '} [NL] +end redefine + + +rule boolTypes + replace [al_type_decl] + 'bool + by + 'int +end rule + +rule ptrTypes + replace [c_type_decl] + 'ptr + by + 'char '* +end rule + +rule boolVals1 + replace [al_term] + 'true + by + '1 +end rule + +rule boolVals2 + replace [al_term] + 'false + by + '0 +end rule + +function alStmtToC1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union]'; + construct CType [c_type_decl] + Type + construct Result [c_variable_decl] + CType [boolTypes] [ptrTypes] Id OptUnion '; + replace [repeat c_lang_stmt] + by + Result +end function + +rule alTermToC1 + replace [al_term] + 'first_token_char + by + 'ts '[0] +end rule + +rule alTermToC2 + replace [al_term] + '< _ [al_type_decl] '> '( AlExpr [al_expr] ') + by + '( AlExpr ') +end rule + +function alTermToC + replace [al_term] + AlTerm [al_term] + by + AlTerm + [alTermToC1] + [alTermToC2] +end function + +function alExprExtendToC AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct RestC [repeat c_expr_extend] + _ [alExprExtendToC Rest] + replace [repeat c_expr_extend] + by + Op Term [alTermToC] RestC +end function + +function alExprToC AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct CExprExtend [repeat c_expr_extend] + _ [alExprExtendToC AlExprExtend] + construct Result [opt c_expr] + ALTerm [alTermToC] CExprExtend + replace [opt c_expr] + by + Result [boolVals1] [boolVals2] +end function + +function alStmtToC2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptCExpr [opt c_expr] + _ [alExprToC AlExpr] + deconstruct OptCExpr + CExpr [c_expr] + replace [repeat c_lang_stmt] + by + CExpr '; +end function + +function alOptElseC AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct CSubStmts [repeat c_lang_stmt] + _ [alToC AlSubStmts] + deconstruct CSubStmts + CSubStmt [c_lang_stmt] + replace [opt c_else] + by + 'else + CSubStmt +end function + +function alStmtToC3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptCExpr [opt c_expr] + _ [alExprToC AlExpr] + deconstruct OptCExpr + CExpr [c_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct CSubStmts [repeat c_lang_stmt] + _ [alToC AlSubStmts] + deconstruct CSubStmts + CSubStmt [c_lang_stmt] + construct OptCElse [opt c_else] + _ [alOptElseC AlOptElse] + replace [repeat c_lang_stmt] + by + 'if '( CExpr ') + CSubStmt + OptCElse +end function + +function alStmtToC4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat c_lang_stmt] + by + 'printf '( '"%i" ', Id '); +end function + +function alStmtToC4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat c_lang_stmt] + by + 'fputs '( String , 'stdout '); +end function + +function alStmtToC4c AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printb Id [id] '; + replace [repeat c_lang_stmt] + by + 'fwrite '( Id ', '1 ', 'pos ', 'stdout '); +end function + +function alStmtToC4d AlStmt [action_lang_stmt] + deconstruct AlStmt + 'print_token '; + replace [repeat c_lang_stmt] + by + 'fwrite '( 'ts ', '1 ', 'te '- 'ts ', 'stdout '); +end function + +function alStmtToC5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct CSubStmts [repeat c_lang_stmt] + _ [alToC AlSubStmts] + replace [repeat c_lang_stmt] + by + '{ CSubStmts '} +end function + +function alStmtToC6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat c_lang_stmt] + by + RagelStmt +end function + +function alToC AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct FirstC [repeat c_lang_stmt] + _ + [alStmtToC1 FirstStmt] + [alStmtToC2 FirstStmt] + [alStmtToC3 FirstStmt] + [alStmtToC4a FirstStmt] + [alStmtToC4b FirstStmt] + [alStmtToC4c FirstStmt] + [alStmtToC4d FirstStmt] + [alStmtToC5 FirstStmt] + [alStmtToC6 FirstStmt] + construct RestC [repeat c_lang_stmt] + _ [alToC Rest] + replace [repeat c_lang_stmt] + by + FirstC [. RestC] +end function + +rule actionTransC + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct CStmts [repeat c_lang_stmt] + _ [alToC AlStmts] + by + '{ CStmts '} +end rule + +rule condTransC + replace [cond_action_stmt] + 'action Id [id] '{ AlExpr [al_expr] '} + construct OptCExpr [opt c_expr] + _ [alExprToC AlExpr] + deconstruct OptCExpr + CExpr [c_expr] + by + 'action Id '{ CExpr '} +end rule + +function langTransC + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct CDefinitions [repeat c_lang_stmt] + _ [alToC Definitions] + construct CInitializations [repeat c_lang_stmt] + _ [alToC Initializations] + by + CDefinitions + '%% + CInitializations + RagelDef [actionTransC] [condTransC] +end function + +function main + replace [program] + P [program] + by + P [langTransC] +end function diff --git a/test/langtrans_csharp.sh b/test/langtrans_csharp.sh new file mode 100755 index 0000000..3980f48 --- /dev/null +++ b/test/langtrans_csharp.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 +root=${file%.rl} +class=${root}_csharp + +# Make a temporary version of the test case using the Java language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_csharp.txl - $class > $file.pr + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: csharp + * @GENERATED: yes +EOF + +grep '@ALLOW_GENFLAGS:' $file | sed 's/-G2//g' +grep '@ALLOW_MINFLAGS:' $file + +cat << EOF + */ +using System; +// Disables lots of warnings that appear in the test suite +#pragma warning disable 0168, 0169, 0219, 0162, 0414 +namespace Test { +class $class +{ +EOF + +# Write the data declarations +sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr + +# Write out the init and execute routines. +cat << EOF + + int cs; + %% write data; + + void init() + { +EOF + +sed -n '0,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr + +cat << EOF + %% write init; + } + + void exec( char[] data, int len ) + { + int p = 0; + int pe = len; + int eof = len; + string _s; + %% write exec; + } + + void finish( ) + { + if ( cs >= ${class}_first_final ) + Console.WriteLine( "ACCEPT" ); + else + Console.WriteLine( "FAIL" ); + } + +EOF + +# Write out the test data. +sed -n '0,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print " static readonly string[] inp = {" +} +{ + print " " $0 "," +} +END { + print " };" + print "" + print " static readonly int inplen = " NR ";" +}' + + +# Write out the main routine. +cat << EOF + + public static void Main (string[] args) + { + $class machine = new $class(); + for ( int i = 0; i < inplen; i++ ) { + machine.init(); + machine.exec( inp[i].ToCharArray(), inp[i].Length ); + machine.finish(); + } + } +} +} +EOF + +# Write out the expected output. +sed -n '/\/\* _____OUTPUT_____/,/_____OUTPUT_____ \*\//p;' $file + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_csharp.txl b/test/langtrans_csharp.txl new file mode 100644 index 0000000..a8c6003 --- /dev/null +++ b/test/langtrans_csharp.txl @@ -0,0 +1,358 @@ +include "testcase.txl" + +keys + 'bool 'new +end keys + + +define csharp_statements + [repeat csharp_lang_stmt] +end define + +define csharp_lang_stmt + [al_ragel_stmt] + | [csharp_variable_decl] + | [csharp_expr_stmt] + | [csharp_if_stmt] + | [EX] '{ [IN] [NL] [csharp_statements] [EX] '} [IN] [NL] +end define + +define csharp_variable_decl + [csharp_type_decl] [opt union] [id] '; [NL] +end define + +define csharp_type_decl + [al_type_decl] + | 'bool + | 'String +end define + +define csharp_expr_stmt + [csharp_expr] '; [NL] +end define + +define csharp_expr + [csharp_term] [repeat csharp_expr_extend] +end define + +define csharp_expr_extend + [al_expr_op] [csharp_term] +end define + +define csharp_term + [al_term] + | [id] [repeat csharp_dot_id] + | [id] [repeat csharp_dot_id] '( [csharp_args] ') + | 'new [csharp_type_decl] [union] + | 'new [csharp_type_decl] '( [csharp_args] ') +end define + +define csharp_dot_id + '. [id] +end define + +define csharp_args + [list csharp_expr] +end define + +define csharp_sign + '- | '+ +end define + +define csharp_if_stmt + 'if '( [csharp_expr] ') [NL] [IN] + [csharp_lang_stmt] [EX] + [opt csharp_else] +end define + +define csharp_else + 'else [NL] [IN] + [csharp_lang_stmt] [EX] +end define + +define csharp_lang + [csharp_statements] + '%% [NL] + [csharp_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [csharp_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [csharp_statements] [EX] '} [NL] +end define + +redefine cond_action_stmt + 'action [id] '{ [al_expr] '} [NL] + | 'action [id] '{ [csharp_expr] '} [NL] +end redefine + + +function clearUnion Type [csharp_type_decl] Id [id] + replace [opt union] + Union [union] + import ArrayInits [csharp_statements] + Stmts [repeat csharp_lang_stmt] + export ArrayInits + Id '= 'new Type Union '; Stmts + by + '[] +end function + +rule ptrTypes + replace [al_type_decl] + 'ptr + by + 'int +end rule + +function alStmtToCSharp1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union] '; + construct CSharpType [csharp_type_decl] + Type + construct Result [csharp_variable_decl] + CSharpType [ptrTypes] OptUnion [clearUnion CSharpType Id] Id '; + replace [repeat csharp_lang_stmt] + by + Result +end function + +rule alTermToCSharp1 + replace [al_term] + 'first_token_char + by + 'data '[ts] +end rule + +rule alTermToCSharp2 + replace [al_term] + '< _ [al_type_decl] '> '( AlExpr [al_expr] ') + by + '( AlExpr ') +end rule + +function alTermToCSharp + replace [al_term] + AlTerm [al_term] + by + AlTerm + [alTermToCSharp1] + [alTermToCSharp2] +end function + +function alExprExtendToCSharp AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct CSharpRest [repeat csharp_expr_extend] + _ [alExprExtendToCSharp Rest] + replace [repeat csharp_expr_extend] + by + Op Term [alTermToCSharp] CSharpRest +end function + +function alExprToCSharp AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct CSharpExprExtend [repeat csharp_expr_extend] + _ [alExprExtendToCSharp AlExprExtend] + construct Result [opt csharp_expr] + ALTerm [alTermToCSharp] CSharpExprExtend + replace [opt csharp_expr] + by + Result +end function + +function alStmtToCSharp2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptCSharpExpr [opt csharp_expr] + _ [alExprToCSharp AlExpr] + deconstruct OptCSharpExpr + CSharpExpr [csharp_expr] + replace [repeat csharp_lang_stmt] + by + CSharpExpr '; +end function + +function alOptElseCSharp AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct CSharpSubStmts [repeat csharp_lang_stmt] + _ [alToCSharp AlSubStmts] + deconstruct CSharpSubStmts + CSharpSubStmt [csharp_lang_stmt] + replace [opt csharp_else] + by + 'else + CSharpSubStmt +end function + +function alStmtToCSharp3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptCSharpExpr [opt csharp_expr] + _ [alExprToCSharp AlExpr] + deconstruct OptCSharpExpr + CSharpExpr [csharp_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct CSharpSubStmts [repeat csharp_lang_stmt] + _ [alToCSharp AlSubStmts] + deconstruct CSharpSubStmts + CSharpSubStmt [csharp_lang_stmt] + construct OptCSharpElse [opt csharp_else] + _ [alOptElseCSharp AlOptElse] + replace [repeat csharp_lang_stmt] + by + 'if '( CSharpExpr ') + CSharpSubStmt + OptCSharpElse +end function + +function alStmtToCSharp4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat csharp_lang_stmt] + by + 'Console '. 'Write '( Id '); +end function + +function alStmtToCSharp4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat csharp_lang_stmt] + by + 'Console '. 'Write '( String '); +end function + +function alStmtToCSharp4c AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printb Id [id] '; + replace [repeat csharp_lang_stmt] + by + '_s '= 'new 'String '( Id ', '0 ', 'pos ') '; + 'Console '. 'Write '( '_s '); +end function + +function alStmtToCSharp4d AlStmt [action_lang_stmt] + deconstruct AlStmt + 'print_token '; + replace [repeat csharp_lang_stmt] + by + '_s '= 'new 'String '( 'data ', 'ts ', 'te '- 'ts ') '; + 'Console '. 'Write '( '_s '); +end function + +function alStmtToCSharp5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct CSharpSubStmts [repeat csharp_lang_stmt] + _ [alToCSharp AlSubStmts] + replace [repeat csharp_lang_stmt] + by + '{ CSharpSubStmts '} +end function + +function alStmtToCSharp6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat csharp_lang_stmt] + by + RagelStmt +end function + + +function alToCSharp AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct CSharpFirst [repeat csharp_lang_stmt] + _ + [alStmtToCSharp1 FirstStmt] + [alStmtToCSharp2 FirstStmt] + [alStmtToCSharp3 FirstStmt] + [alStmtToCSharp4a FirstStmt] + [alStmtToCSharp4b FirstStmt] + [alStmtToCSharp4c FirstStmt] + [alStmtToCSharp4d FirstStmt] + [alStmtToCSharp5 FirstStmt] + [alStmtToCSharp6 FirstStmt] + construct CSharpRest [repeat csharp_lang_stmt] + _ [alToCSharp Rest] + replace [repeat csharp_lang_stmt] + by + CSharpFirst [. CSharpRest] +end function + +rule actionTransCSharp + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct CSharpStmts [repeat csharp_lang_stmt] + _ [alToCSharp AlStmts] + by + '{ CSharpStmts '} +end rule + +rule condTransCSharp + replace [cond_action_stmt] + 'action Id [id] '{ AlExpr [al_expr] '} + construct OptCSharpExpr [opt csharp_expr] + _ [alExprToCSharp AlExpr] + deconstruct OptCSharpExpr + CSharpExpr [csharp_expr] + by + 'action Id '{ CSharpExpr '} +end rule + +rule machineName + replace $ [machine_stmt] + 'machine _ [id] '; + import TXLargs [repeat stringlit] + Arg1 [stringlit] _ [repeat stringlit] + construct ClassName [id] + _ [unquote Arg1] + by + 'machine ClassName '; +end rule + +function langTransCSharp + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct CSharpDefinitions [repeat csharp_lang_stmt] + _ [alToCSharp Definitions] + construct CSharpInitializations [repeat csharp_lang_stmt] + _ [alToCSharp Initializations] + construct NewRagelDef [ragel_def] + RagelDef [actionTransCSharp] [condTransCSharp] [machineName] + import ArrayInits [csharp_statements] + ArrayInitStmts [repeat csharp_lang_stmt] + by + CSharpDefinitions + '%% + ArrayInitStmts [. CSharpInitializations] + NewRagelDef +end function + +function main + replace [program] + P [program] + export ArrayInits [csharp_statements] + _ + by + P [langTransCSharp] +end function diff --git a/test/langtrans_d.sh b/test/langtrans_d.sh new file mode 100755 index 0000000..764afd7 --- /dev/null +++ b/test/langtrans_d.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 + +# Get the amchine name. +machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' $file` + +# Make a temporary version of the test case the D language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_d.txl > $file.pr + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: d + * @GENERATED: yes +EOF + +grep '@ALLOW_GENFLAGS:' $file +grep '@ALLOW_MINFLAGS:' $file + +cat << EOF + */ +import std.stdio; +import std.string; + +class $machine +{ +EOF + +# Write the data declarations +sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr + +# Write out the init and execute routines. +cat << EOF + int cs; + %% write data; + void init() + { +EOF + +sed -n '0,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr + +cat << EOF + %% write init; + } + + void exec( char data[] ) + { + char *p = data.ptr; + char *pe = data.ptr + data.length; + char *eof = pe; + char _s[]; + + %% write exec; + } + + void finish( ) + { + if ( cs >= ${machine}_first_final ) + writefln( "ACCEPT" ); + else + writefln( "FAIL" ); + } + +EOF + +# Write out the test data. +sed -n '0,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print " char[][] inp = [" +} +{ + print " " $0 "," +} +END { + print " ];" + print "" + print " int inplen = " NR ";" +}' + +# Write out the main routine. +cat << EOF +} + +int main( ) +{ + $machine m = new $machine(); + int i; + for ( i = 0; i < m.inplen; i++ ) { + m.init(); + m.exec( m.inp[i] ); + m.finish(); + } + return 0; +} +/* _____OUTPUT_____ +EOF + +# Write out the expected output. +sed -n '0,/\/\* _____OUTPUT_____/d; /_____OUTPUT_____ \*\//q; p;' $file +echo "*/" + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_d.txl b/test/langtrans_d.txl new file mode 100644 index 0000000..845a003 --- /dev/null +++ b/test/langtrans_d.txl @@ -0,0 +1,299 @@ +include "testcase.txl" + +define d_statements + [repeat d_lang_stmt] +end define + +define d_lang_stmt + [al_ragel_stmt] + | [d_variable_decl] + | [d_expr_stmt] + | [d_if_stmt] + | [EX] '{ [IN] [NL] [d_statements] [EX] '} [IN] [NL] +end define + +define d_variable_decl + [d_type_decl] [id] [opt union] '; [NL] +end define + +define d_type_decl + [al_type_decl] + | 'char '* +end define + +define d_expr_stmt + [d_expr] '; [NL] +end define + +define d_expr + [d_term] [repeat d_expr_extend] +end define + +define d_expr_extend + [al_expr_op] [d_term] +end define + +define d_term + [al_term] + | [id] '( [d_args] ') +end define + +define d_args + [list d_expr] +end define + +define d_sign + '- | '+ +end define + +define d_if_stmt + 'if '( [d_expr] ') [NL] [IN] + [d_lang_stmt] [EX] + [opt d_else] +end define + +define d_else + 'else [NL] [IN] + [d_lang_stmt] [EX] +end define + +define d_lang + [d_statements] + '%% [NL] + [d_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [d_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [d_statements] [EX] '} [NL] +end define + +rule ptrTypes + replace [d_type_decl] + 'ptr + by + 'char '* +end rule + +function alStmtToD1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union] '; + construct DType [d_type_decl] + Type + construct Result [d_variable_decl] + DType [ptrTypes] Id OptUnion '; + replace [repeat d_lang_stmt] + by + Result +end function + +rule alTermToD1 + replace [al_term] + 'first_token_char + by + 'ts '[0] +end rule + +rule alTermToD2 + replace [al_term] + '< _ [al_type_decl] '> '( AlExpr [al_expr] ') + by + '( AlExpr ') +end rule + +function alTermToD + replace [al_term] + AlTerm [al_term] + by + AlTerm + [alTermToD1] + [alTermToD2] +end function + +function alExprExtendToD AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct DRest [repeat d_expr_extend] + _ [alExprExtendToD Rest] + replace [repeat d_expr_extend] + by + Op Term [alTermToD] DRest +end function + +function alExprToD AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct DExprExtend [repeat d_expr_extend] + _ [alExprExtendToD AlExprExtend] + construct Result [opt d_expr] + ALTerm [alTermToD] DExprExtend + replace [opt d_expr] + by + Result +end function + +function alStmtToD2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptDExpr [opt d_expr] + _ [alExprToD AlExpr] + deconstruct OptDExpr + DExpr [d_expr] + replace [repeat d_lang_stmt] + by + DExpr '; +end function + +function alOptElseD AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct DSubStmts [repeat d_lang_stmt] + _ [alToD AlSubStmts] + deconstruct DSubStmts + DSubStmt [d_lang_stmt] + replace [opt d_else] + by + 'else + DSubStmt +end function + +function alStmtToD3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptDExpr [opt d_expr] + _ [alExprToD AlExpr] + deconstruct OptDExpr + DExpr [d_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct DSubStmts [repeat d_lang_stmt] + _ [alToD AlSubStmts] + deconstruct DSubStmts + DSubStmt [d_lang_stmt] + construct OptDElse [opt d_else] + _ [alOptElseD AlOptElse] + replace [repeat d_lang_stmt] + by + 'if '( DExpr ') + DSubStmt + OptDElse +end function + +function alStmtToD4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat d_lang_stmt] + by + 'writef '( '"%d" ', Id ') '; +end function + +function alStmtToD4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat d_lang_stmt] + by + 'writef '( '"%s" ', String ') '; +end function + +function alStmtToD4c AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printb Id [id] '; + replace [repeat d_lang_stmt] + by + '_s '= Id '[0..pos] '; + 'writef '( '"%s" ', '_s ') '; +end function + +function alStmtToD4d AlStmt [action_lang_stmt] + deconstruct AlStmt + 'print_token '; + replace [repeat d_lang_stmt] + by + '_s '= ts '[0..(te-ts)] '; + 'writef '( '"%s" ', '_s ') '; +end function + +function alStmtToD5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct DSubStmts [repeat d_lang_stmt] + _ [alToD AlSubStmts] + replace [repeat d_lang_stmt] + by + '{ DSubStmts '} +end function + +function alStmtToD6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat d_lang_stmt] + by + RagelStmt +end function + +function alToD AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct DFirst [repeat d_lang_stmt] + _ + [alStmtToD1 FirstStmt] + [alStmtToD2 FirstStmt] + [alStmtToD3 FirstStmt] + [alStmtToD4a FirstStmt] + [alStmtToD4b FirstStmt] + [alStmtToD4c FirstStmt] + [alStmtToD4d FirstStmt] + [alStmtToD5 FirstStmt] + [alStmtToD6 FirstStmt] + construct DRest [repeat d_lang_stmt] + _ [alToD Rest] + replace [repeat d_lang_stmt] + by + DFirst [. DRest] +end function + +rule actionTransD + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct DStmts [repeat d_lang_stmt] + _ [alToD AlStmts] + by + '{ DStmts '} +end rule + +function langTransD + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct DDefinitions [repeat d_lang_stmt] + _ [alToD Definitions] + construct DInitializations [repeat d_lang_stmt] + _ [alToD Initializations] + by + DDefinitions + '%% + DInitializations + RagelDef [actionTransD] +end function + +function main + replace [program] + P [program] + by + P [langTransD] +end function diff --git a/test/langtrans_java.sh b/test/langtrans_java.sh new file mode 100755 index 0000000..69a6f90 --- /dev/null +++ b/test/langtrans_java.sh @@ -0,0 +1,106 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 +root=${file%.rl} +class=${root}_java + +# Make a temporary version of the test case using the Java language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_java.txl - $class > $file.pr + +# Begin writing out the test case. +cat << EOF +/* + * @LANG: java + * @GENERATED: yes +EOF + +grep '@ALLOW_GENFLAGS:' $file +grep '@ALLOW_MINFLAGS:' $file + +cat << EOF + */ + +class $class +{ +EOF + +# Write the data declarations +sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr + +# Write out the init and execute routines. +cat << EOF + + int cs; + %% write data; + + void init() + { +EOF + +sed -n '0,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr + +cat << EOF + %% write init; + } + + void exec( char data[], int len ) + { + int p = 0; + int pe = len; + int eof = len; + String _s; + %% write exec; + } + + void finish( ) + { + if ( cs >= ${class}_first_final ) + System.out.println( "ACCEPT" ); + else + System.out.println( "FAIL" ); + } + +EOF + +# Write out the test data. +sed -n '0,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print " static final String inp[] = {" +} +{ + print " " $0 "," +} +END { + print " };" + print "" + print " static final int inplen = " NR ";" +}' + + +# Write out the main routine. +cat << EOF + + public static void main (String[] args) + { + $class machine = new $class(); + for ( int i = 0; i < inplen; i++ ) { + machine.init(); + machine.exec( inp[i].toCharArray(), inp[i].length() ); + machine.finish(); + } + } +} + +EOF + +# Write out the expected output. +sed -n '/\/\* _____OUTPUT_____/,/_____OUTPUT_____ \*\//p;' $file + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_java.txl b/test/langtrans_java.txl new file mode 100644 index 0000000..c5cde5d --- /dev/null +++ b/test/langtrans_java.txl @@ -0,0 +1,365 @@ +include "testcase.txl" + +keys + 'boolean 'new +end keys + + +define java_statements + [repeat java_lang_stmt] +end define + +define java_lang_stmt + [al_ragel_stmt] + | [java_variable_decl] + | [java_expr_stmt] + | [java_if_stmt] + | [EX] '{ [IN] [NL] [java_statements] [EX] '} [IN] [NL] +end define + +define java_variable_decl + [java_type_decl] [id] [opt union] '; [NL] +end define + +define java_type_decl + [al_type_decl] + | 'boolean + | 'String +end define + +define java_expr_stmt + [java_expr] '; [NL] +end define + +define java_expr + [java_term] [repeat java_expr_extend] +end define + +define java_expr_extend + [al_expr_op] [java_term] +end define + +define java_term + [al_term] + | [id] [repeat java_dot_id] + | [id] [repeat java_dot_id] '( [java_args] ') + | 'new [java_type_decl] [union] + | 'new [java_type_decl] '( [java_args] ') +end define + +define java_dot_id + '. [id] +end define + +define java_args + [list java_expr] +end define + +define java_sign + '- | '+ +end define + +define java_if_stmt + 'if '( [java_expr] ') [NL] [IN] + [java_lang_stmt] [EX] + [opt java_else] +end define + +define java_else + 'else [NL] [IN] + [java_lang_stmt] [EX] +end define + +define java_lang + [java_statements] + '%% [NL] + [java_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [java_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [java_statements] [EX] '} [NL] +end define + +redefine cond_action_stmt + 'action [id] '{ [al_expr] '} [NL] + | 'action [id] '{ [java_expr] '} [NL] +end redefine + + +function clearUnion Type [java_type_decl] Id [id] + replace [opt union] + Union [union] + import ArrayInits [java_statements] + Stmts [repeat java_lang_stmt] + export ArrayInits + Id '= 'new Type Union '; Stmts + by + '[] +end function + +rule boolTypes + replace [java_type_decl] + 'bool + by + 'boolean +end rule + +rule ptrTypes + replace [al_type_decl] + 'ptr + by + 'int +end rule + +function alStmtToJava1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union] '; + construct JavaType [java_type_decl] + Type + construct Result [java_variable_decl] + JavaType [boolTypes] [ptrTypes] Id OptUnion [clearUnion JavaType Id] '; + replace [repeat java_lang_stmt] + by + Result +end function + +rule alTermToJava1 + replace [al_term] + 'first_token_char + by + 'data '[ts] +end rule + +rule alTermToJava2 + replace [al_term] + '< _ [al_type_decl] '> '( AlExpr [al_expr] ') + by + '( AlExpr ') +end rule + +function alTermToJava + replace [al_term] + AlTerm [al_term] + by + AlTerm + [alTermToJava1] + [alTermToJava2] +end function + +function alExprExtendToJava AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct JavaRest [repeat java_expr_extend] + _ [alExprExtendToJava Rest] + replace [repeat java_expr_extend] + by + Op Term [alTermToJava] JavaRest +end function + +function alExprToJava AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct JavaExprExtend [repeat java_expr_extend] + _ [alExprExtendToJava AlExprExtend] + construct Result [opt java_expr] + ALTerm [alTermToJava] JavaExprExtend + replace [opt java_expr] + by + Result +end function + +function alStmtToJava2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptJavaExpr [opt java_expr] + _ [alExprToJava AlExpr] + deconstruct OptJavaExpr + JavaExpr [java_expr] + replace [repeat java_lang_stmt] + by + JavaExpr '; +end function + +function alOptElseJava AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct JavaSubStmts [repeat java_lang_stmt] + _ [alToJava AlSubStmts] + deconstruct JavaSubStmts + JavaSubStmt [java_lang_stmt] + replace [opt java_else] + by + 'else + JavaSubStmt +end function + +function alStmtToJava3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptJavaExpr [opt java_expr] + _ [alExprToJava AlExpr] + deconstruct OptJavaExpr + JavaExpr [java_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct JavaSubStmts [repeat java_lang_stmt] + _ [alToJava AlSubStmts] + deconstruct JavaSubStmts + JavaSubStmt [java_lang_stmt] + construct OptJavaElse [opt java_else] + _ [alOptElseJava AlOptElse] + replace [repeat java_lang_stmt] + by + 'if '( JavaExpr ') + JavaSubStmt + OptJavaElse +end function + +function alStmtToJava4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat java_lang_stmt] + by + 'System '. 'out '. 'print '( Id '); +end function + +function alStmtToJava4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat java_lang_stmt] + by + 'System '. 'out '. 'print '( String '); +end function + +function alStmtToJava4c AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printb Id [id] '; + replace [repeat java_lang_stmt] + by + '_s '= 'new 'String '( Id ', '0 ', 'pos ') '; + 'System '. 'out '. 'print '( '_s '); +end function + +function alStmtToJava4d AlStmt [action_lang_stmt] + deconstruct AlStmt + 'print_token '; + replace [repeat java_lang_stmt] + by + '_s '= 'new 'String '( 'data ', 'ts ', 'te '- 'ts ') '; + 'System '. 'out '. 'print '( '_s '); +end function + +function alStmtToJava5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct JavaSubStmts [repeat java_lang_stmt] + _ [alToJava AlSubStmts] + replace [repeat java_lang_stmt] + by + '{ JavaSubStmts '} +end function + +function alStmtToJava6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat java_lang_stmt] + by + RagelStmt +end function + + +function alToJava AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct JavaFirst [repeat java_lang_stmt] + _ + [alStmtToJava1 FirstStmt] + [alStmtToJava2 FirstStmt] + [alStmtToJava3 FirstStmt] + [alStmtToJava4a FirstStmt] + [alStmtToJava4b FirstStmt] + [alStmtToJava4c FirstStmt] + [alStmtToJava4d FirstStmt] + [alStmtToJava5 FirstStmt] + [alStmtToJava6 FirstStmt] + construct JavaRest [repeat java_lang_stmt] + _ [alToJava Rest] + replace [repeat java_lang_stmt] + by + JavaFirst [. JavaRest] +end function + +rule actionTransJava + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct JavaStmts [repeat java_lang_stmt] + _ [alToJava AlStmts] + by + '{ JavaStmts '} +end rule + +rule condTransJava + replace [cond_action_stmt] + 'action Id [id] '{ AlExpr [al_expr] '} + construct OptJavaExpr [opt java_expr] + _ [alExprToJava AlExpr] + deconstruct OptJavaExpr + JavaExpr [java_expr] + by + 'action Id '{ JavaExpr '} +end rule + +rule machineName + replace $ [machine_stmt] + 'machine _ [id] '; + import TXLargs [repeat stringlit] + Arg1 [stringlit] _ [repeat stringlit] + construct ClassName [id] + _ [unquote Arg1] + by + 'machine ClassName '; +end rule + +function langTransJava + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct JavaDefinitions [repeat java_lang_stmt] + _ [alToJava Definitions] + construct JavaInitializations [repeat java_lang_stmt] + _ [alToJava Initializations] + construct NewRagelDef [ragel_def] + RagelDef [actionTransJava] [condTransJava] [machineName] + import ArrayInits [java_statements] + ArrayInitStmts [repeat java_lang_stmt] + by + JavaDefinitions + '%% + ArrayInitStmts [. JavaInitializations] + NewRagelDef +end function + +function main + replace [program] + P [program] + export ArrayInits [java_statements] + _ + by + P [langTransJava] +end function diff --git a/test/langtrans_ruby.sh b/test/langtrans_ruby.sh new file mode 100755 index 0000000..355d8d4 --- /dev/null +++ b/test/langtrans_ruby.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# + +file=$1 + +[ -f $file ] || exit 1 + +# Get the machine name. +machine=`sed -n 's/^[\t ]*machine[\t ]*\([a-zA-Z_0-9]*\)[\t ]*;[\t ]*$/\1/p' \ + $file | tr '[A-Z]' '[a-z]'` + +# Make a temporary version of the test case using the Ruby language translations. +sed -n '/\/\*/,/\*\//d;p' $file | txl -q stdin langtrans_ruby.txl > $file.pr + +# Begin writing out the test case. +cat << EOF +# +# @LANG: ruby +# @GENERATED: yes +EOF + +grep '@ALLOW_GENFLAGS:' $file | sed 's/^ *\*/#/' | sed 's/-G.//g' +grep '@ALLOW_MINFLAGS:' $file | sed 's/^ *\*/#/' + +cat << EOF +# + +EOF + +# Write out the machine specification. +sed -n '/^%%{$/,/^}%%/{s/^/\t/;p}' $file.pr + +# Write out the init and execute routines. +cat << EOF + + %% write data; + + def run_machine( data ) + p = 0 + pe = data.length + eof = data.length + cs = 0; +EOF + +# Write the data declarations +sed -n '/^%%$/q;{s/^/\t/;p}' $file.pr + +# Write the data initializations +sed -n '0,/^%%$/d; /^%%{$/q; {s/^/\t\t/;p}' $file.pr + +cat << EOF + + %% write init; + %% write exec; + if cs >= ${machine}_first_final + puts "ACCEPT" + else + puts "FAIL" + end + end + +EOF + +# Write out the test data. +sed -n '0,/\/\* _____INPUT_____/d; /_____INPUT_____ \*\//q; p;' $file | awk ' +BEGIN { + print " inp = [" +} +{ + print " " $0 "," +} +END { + print " ]" + print "" + print " inplen = " NR ";" +}' + + +# Write out the main routine. +cat << EOF + + inp.each { |str| + run_machine(str.unpack("c*")) + } + +EOF + +# Write out the expected output. +echo "=begin _____OUTPUT_____" + +sed -n '/\/\* _____OUTPUT_____/,/_____OUTPUT_____ \*\//{/_____OUTPUT_____/d;p;};' $file + +echo "=end _____OUTPUT_____" + +# Don't need this language-specific file anymore. +rm $file.pr diff --git a/test/langtrans_ruby.txl b/test/langtrans_ruby.txl new file mode 100644 index 0000000..42f203d --- /dev/null +++ b/test/langtrans_ruby.txl @@ -0,0 +1,392 @@ +include "testcase.txl" + +keys + 'boolean 'new +end keys + + +define ruby_statements + [repeat ruby_lang_stmt] +end define + +define ruby_lang_stmt + [al_ragel_stmt] + | [ruby_expr_stmt] + | [ruby_if_stmt] + | [EX] 'do [IN] [NL] [ruby_statements] [EX] 'end [IN] [NL] +end define + +define ruby_type_decl + [al_type_decl] + | 'boolean +end define + +define ruby_expr_stmt + [ruby_expr] '; [NL] +end define + +define ruby_expr + [ruby_term] [repeat ruby_expr_extend] +end define + +define ruby_expr_extend + [al_expr_op] [ruby_term] +end define + +define ruby_term + [al_term] + | [stringlit] [union] + | [id] [repeat ruby_dot_id] + | [SPOFF] [id] [repeat ruby_dot_id] '( [SPON] [ruby_args] ') + | [union] +end define + +define ruby_dot_id + '. [id] +end define + +define ruby_args + [list ruby_expr] +end define + +define ruby_sign + '- | '+ +end define + +define ruby_if_stmt + 'if [ruby_expr] [NL] [IN] + [ruby_statements] [EX] + [opt ruby_else] + 'end [NL] +end define + +define ruby_else + 'else [NL] [IN] + [ruby_statements] [EX] +end define + +define ruby_lang + [ruby_statements] + '%% [NL] + [ruby_statements] + [ragel_def] +end define + +define program + [lang_indep] + | [ruby_lang] +end define + +redefine al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] + | '{ [NL] [IN] [ruby_statements] [EX] '} [NL] +end define + +redefine cond_action_stmt + 'action [id] '{ [al_expr] '} [NL] + | 'action [id] '{ [ruby_expr] '} [NL] +end redefine + +function initDecl1 VarDecl [al_variable_decl] + deconstruct VarDecl + 'bool Id [id] '; + replace [repeat ruby_lang_stmt] + by + Id '= 'false '; +end function + +function initDecl2 VarDecl [al_variable_decl] + deconstruct VarDecl + 'char Id [id] '; + replace [repeat ruby_lang_stmt] + by + Id '= ''c' '; +end function + +function initDecl3 VarDecl [al_variable_decl] + deconstruct VarDecl + 'int Id [id] '; + replace [repeat ruby_lang_stmt] + by + Id '= '0 '; +end function + +function initDecl4 VarDecl [al_variable_decl] + deconstruct VarDecl + 'ptr Id [id] '; + replace [repeat ruby_lang_stmt] + by + Id '= '-1 '; +end function + +function initDecl5 VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] Union [union] '; + replace [repeat ruby_lang_stmt] + by + Id '= '[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] '; +end function + + +function alStmtToRuby1 AlStmt [action_lang_stmt] + deconstruct AlStmt + VarDecl [al_variable_decl] + deconstruct VarDecl + Type [al_type_decl] Id [id] OptUnion [opt union] '; + replace [repeat ruby_lang_stmt] + by + _ [initDecl1 VarDecl] [initDecl2 VarDecl] + [initDecl3 VarDecl] [initDecl4 VarDecl] + [initDecl5 VarDecl] +end function + +rule alTermToRuby1 + replace [al_term] + 'first_token_char + by + 'data '[ts] +end rule + +rule alTermToRuby2 + replace [al_term] + '< _ [al_type_decl] '> '( AlExpr [al_expr] ') + by + '( AlExpr ') +end rule + +function alTermToRuby + replace [al_term] + AlTerm [al_term] + by + AlTerm + [alTermToRuby1] + [alTermToRuby2] +end function + +function alExprExtendToRuby AlExprExtend [repeat al_expr_extend] + deconstruct AlExprExtend + Op [al_expr_op] Term [al_term] Rest [repeat al_expr_extend] + construct RubyRest [repeat ruby_expr_extend] + _ [alExprExtendToRuby Rest] + replace [repeat ruby_expr_extend] + by + Op Term [alTermToRuby] RubyRest +end function + +% Note: this doesn't go into the ( al_expr ) form of al_term. +function alExprToRuby AlExpr [al_expr] + deconstruct AlExpr + ALTerm [al_term] AlExprExtend [repeat al_expr_extend] + construct RubyExprExtend [repeat ruby_expr_extend] + _ [alExprExtendToRuby AlExprExtend] + construct Result [opt ruby_expr] + ALTerm [alTermToRuby] RubyExprExtend + replace [opt ruby_expr] + by + Result +end function + +function alStmtToRuby2 AlStmt [action_lang_stmt] + deconstruct AlStmt + AlExpr [al_expr] '; + construct OptRubyExpr [opt ruby_expr] + _ [alExprToRuby AlExpr] + deconstruct OptRubyExpr + RubyExpr [ruby_expr] + replace [repeat ruby_lang_stmt] + by + RubyExpr '; +end function + +function liftBlock + replace [repeat ruby_lang_stmt] + 'do Block [repeat ruby_lang_stmt] 'end + by + Block +end function + +function alOptElseRuby AlOptElse [opt al_else] + deconstruct AlOptElse + 'else + AlSubStmt [action_lang_stmt] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct RubySubStmts [repeat ruby_lang_stmt] + _ [alToRuby AlSubStmts] + deconstruct RubySubStmts + RubySubStmt [ruby_lang_stmt] + replace [opt ruby_else] + by + 'else + RubySubStmts [liftBlock] +end function + +function alStmtToRuby3 AlStmt [action_lang_stmt] + deconstruct AlStmt + 'if '( AlExpr [al_expr] ') + AlSubStmt [action_lang_stmt] + AlOptElse [opt al_else] + construct OptRubyExpr [opt ruby_expr] + _ [alExprToRuby AlExpr] + deconstruct OptRubyExpr + RubyExpr [ruby_expr] + construct AlSubStmts [repeat action_lang_stmt] + AlSubStmt + construct RubySubStmts [repeat ruby_lang_stmt] + _ [alToRuby AlSubStmts] + construct OptRubyElse [opt ruby_else] + _ [alOptElseRuby AlOptElse] + replace [repeat ruby_lang_stmt] + by + 'if RubyExpr + RubySubStmts [liftBlock] + OptRubyElse + 'end +end function + +function alStmtToRuby4a AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printi Id [id] '; + replace [repeat ruby_lang_stmt] + by + 'print '( Id ') '; +end function + +function alStmtToRuby4b AlStmt [action_lang_stmt] + deconstruct AlStmt + 'prints String [stringlit] '; + replace [repeat ruby_lang_stmt] + by + 'print '( String ') '; +end function + +function alStmtToRuby4c AlStmt [action_lang_stmt] + deconstruct AlStmt + 'printb Id [id] '; + replace [repeat ruby_lang_stmt] + by + '_a = Id '[0..pos-1] '; + 'print '( '_a '. 'pack '( '"c*" ') ') '; +end function + +function alStmtToRuby4d AlStmt [action_lang_stmt] + deconstruct AlStmt + 'print_token '; + replace [repeat ruby_lang_stmt] + by + '_m = 'data '[ts..te-1] '; + 'print '( '_m '. 'pack '( '"c*" ') ') '; +end function + +function alStmtToRuby5 AlStmt [action_lang_stmt] + deconstruct AlStmt + '{ AlSubStmts [repeat action_lang_stmt] '} + construct RubySubStmts [repeat ruby_lang_stmt] + _ [alToRuby AlSubStmts] + replace [repeat ruby_lang_stmt] + by + 'do RubySubStmts 'end +end function + +function alStmtToRuby6 AlStmt [action_lang_stmt] + deconstruct AlStmt + RagelStmt [al_ragel_stmt] + replace [repeat ruby_lang_stmt] + by + RagelStmt +end function + +rule fixCharLit + replace $ [al_term] + CharLit [charlit] + construct BaseId [id] + 'id + construct Id [id] + BaseId [unquote CharLit] + construct EmptyString [stringlit] + '"" + construct Repl [stringlit] + EmptyString [quote Id] + by + Repl '[0] +end rule + + +function alToRuby AlStmts [repeat action_lang_stmt] + deconstruct AlStmts + FirstStmt [action_lang_stmt] Rest [repeat action_lang_stmt] + construct RubyFirst [repeat ruby_lang_stmt] + _ + [alStmtToRuby1 FirstStmt] + [alStmtToRuby2 FirstStmt] + [alStmtToRuby3 FirstStmt] + [alStmtToRuby4a FirstStmt] + [alStmtToRuby4b FirstStmt] + [alStmtToRuby4c FirstStmt] + [alStmtToRuby4d FirstStmt] + [alStmtToRuby5 FirstStmt] + [alStmtToRuby6 FirstStmt] + [fixCharLit] + construct RubyRest [repeat ruby_lang_stmt] + _ [alToRuby Rest] + replace [repeat ruby_lang_stmt] + by + RubyFirst [. RubyRest] +end function + +rule actionTransRuby + replace [al_host_block] + '{ AlStmts [repeat action_lang_stmt] '} + construct RubyStmts [repeat ruby_lang_stmt] + _ [alToRuby AlStmts] + by + '{ RubyStmts '} +end rule + +rule condTransRuby + replace [cond_action_stmt] + 'action Id [id] '{ AlExpr [al_expr] '} + construct OptRubyExpr [opt ruby_expr] + _ [alExprToRuby AlExpr] + deconstruct OptRubyExpr + RubyExpr [ruby_expr] + by + 'action Id '{ RubyExpr '} +end rule + +rule lowercaseMachine + replace $ [machine_stmt] + 'machine Id [id] '; + by + 'machine Id [tolower] '; +end rule + +function langTransRuby + replace [program] + Definitions [repeat action_lang_stmt] + '%% + Initializations [repeat action_lang_stmt] + RagelDef [ragel_def] + construct RubyDefinitions [repeat ruby_lang_stmt] + _ [alToRuby Definitions] + construct RubyInitializations [repeat ruby_lang_stmt] + _ [alToRuby Initializations] + construct NewRagelDef [ragel_def] + RagelDef [actionTransRuby] [condTransRuby] [lowercaseMachine] + import ArrayInits [ruby_statements] + ArrayInitStmts [repeat ruby_lang_stmt] + by + RubyDefinitions + '%% + ArrayInitStmts [. RubyInitializations] + NewRagelDef +end function + +function main + replace [program] + P [program] + export ArrayInits [ruby_statements] + _ + by + P [langTransRuby] +end function diff --git a/test/lmgoto.rl b/test/lmgoto.rl new file mode 100644 index 0000000..e8e82a8 --- /dev/null +++ b/test/lmgoto.rl @@ -0,0 +1,198 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +#define TK_Dlit 192 +#define TK_Slit 193 +#define TK_Float 194 +#define TK_Id 195 +#define TK_NameSep 197 +#define TK_Arrow 211 +#define TK_PlusPlus 212 +#define TK_MinusMinus 213 +#define TK_ArrowStar 214 +#define TK_DotStar 215 +#define TK_ShiftLeft 216 +#define TK_ShiftRight 217 +#define TK_IntegerDecimal 218 +#define TK_IntegerOctal 219 +#define TK_IntegerHex 220 +#define TK_EqualsEquals 223 +#define TK_NotEquals 224 +#define TK_AndAnd 225 +#define TK_OrOr 226 +#define TK_MultAssign 227 +#define TK_DivAssign 228 +#define TK_PercentAssign 229 +#define TK_PlusAssign 230 +#define TK_MinusAssign 231 +#define TK_AmpAssign 232 +#define TK_CaretAssign 233 +#define TK_BarAssign 234 +#define TK_DotDotDot 240 +#define TK_Whitespace 241 +#define TK_Comment 242 + +struct Scanner +{ + int cs, act; + const char *ts, *te; + bool isCxx; + + void token( int tok ); + void run( const char *buf ); +}; + + +%%{ + machine Scanner; + + # Process all comments, relies on isCxx being set. + comment := |* + '*/' { + if ( ! isCxx ) + fgoto main; + else { + cout << "comm char: " << ts[0] << endl; + cout << "comm char: " << ts[1] << endl; + } + }; + + '\n' { + if ( isCxx ) + fgoto main; + else + cout << "comm char: " << ts[0] << endl; + }; + + any { + cout << "comm char: " << ts[0] << endl; + }; + *|; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) { token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) { token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) { token( TK_Id ); }; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) { token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) { token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) { token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) { token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' {token( TK_NameSep );}; + '==' {token( TK_EqualsEquals );}; + '!=' {token( TK_NotEquals );}; + '&&' {token( TK_AndAnd );}; + '||' {token( TK_OrOr );}; + '*=' {token( TK_MultAssign );}; + '/=' {token( TK_DivAssign );}; + '%=' {token( TK_PercentAssign );}; + '+=' {token( TK_PlusAssign );}; + '-=' {token( TK_MinusAssign );}; + '&=' {token( TK_AmpAssign );}; + '^=' {token( TK_CaretAssign );}; + '|=' {token( TK_BarAssign );}; + '++' {token( TK_PlusPlus );}; + '--' {token( TK_MinusMinus );}; + '->' {token( TK_Arrow );}; + '->*' {token( TK_ArrowStar );}; + '.*' {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' { token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) { token( ts[0] );}; + + # Comments and whitespace. Handle these outside of the machine so that se + # don't end up buffering the comments. + '/*' { isCxx = false; fgoto comment; }; + '//' { isCxx = true; fgoto comment; }; + + ( any - 33..126 )+ { token( TK_Whitespace );}; + + *|; +}%% + +%% write data nofinal; + +void Scanner::token( int tok ) +{ + const char *data = ts; + int len = te - ts; + cout << "<" << tok << "> "; + if ( data != 0 ) { + for ( int i = 0; i < len; i++ ) + cout << data[i]; + } + cout << '\n'; +} + +void Scanner::run( const char *buf ) +{ + int len = strlen( buf ); + %% write init; + const char *p = buf; + const char *pe = buf + len; + const char *eof = pe; + %% write exec; + + if ( cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cout << "PARSE ERROR" << endl; + } +} + +int main() +{ + Scanner scanner; + scanner.run( + "//hello*/\n" + "/*hi there*/ hello 0x88" + ); + return 0; +} + +#ifdef _____OUTPUT_____ +comm char: h +comm char: e +comm char: l +comm char: l +comm char: o +comm char: * +comm char: / +comm char: h +comm char: i +comm char: +comm char: t +comm char: h +comm char: e +comm char: r +comm char: e +<241> +<195> hello +<241> +<220> 0x88 +#endif diff --git a/test/mailbox1.h b/test/mailbox1.h new file mode 100644 index 0000000..e7cd37c --- /dev/null +++ b/test/mailbox1.h @@ -0,0 +1,33 @@ +#ifndef _MAILBOX1_H +#define _MAILBOX1_H + +#include <stdio.h> +#include <string.h> +#include "vector.h" + +struct MBox +{ + int cs; + + Vector<char> headName; + Vector<char> headContent; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +#endif diff --git a/test/mailbox1.rl b/test/mailbox1.rl new file mode 100644 index 0000000..ea23173 --- /dev/null +++ b/test/mailbox1.rl @@ -0,0 +1,253 @@ +/* + * @LANG: c++ + * @CFLAGS: -I../aapl + * + * Test works with split code gen. + */ + +/* + * Parses unix mail boxes into headers and bodies. + */ + +#include "mailbox1.h" + +%%{ + machine MBox; + + # Buffer the header names. + action bufHeadName { fsm->headName.append(fc); } + + # Buffer the header content. + action bufHeadContent { fsm->headContent.append(fc); } + + # Terminate a header. If it is an interesting header then prints it. + action finBufHeadContent { + /* Terminate the buffers. */ + fsm->headName.append(0); + fsm->headContent.append(0); + + /* Print the header. Interesting headers. */ + printf("%s:%s\n", fsm->headName.data, fsm->headContent.data); + + /* Clear for the next time we use them. */ + fsm->headName.empty(); + fsm->headContent.empty(); + } + + action msgstart{ + printf("NEW MESSAGE\n"); + } + + # Prints a blank line after the end of the headers of each message. + action blankLine { + printf("\n"); + } + + # Helpers we will use in matching the date section of the from line. + day = /[A-Z][a-z][a-z]/; + month = /[A-Z][a-z][a-z]/; + year = /[0-9][0-9][0-9][0-9]/; + time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' ); + letterZone = /[A-Z][A-Z][A-Z]/; + numZone = /[+\-][0-9][0-9][0-9][0-9]/; + zone = letterZone | numZone; + dayNum = /[0-9 ][0-9]/; + + # These are the different formats of the date minus an obscure + # type that has a funny string 'remote from xxx' on the end. Taken + # from c-client in the imap-2000 distribution. + date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' . + ( year | year . ' ' . zone | zone . ' ' . year ); + + # Note the priority assignment on the end of the from line. While we + # matching the body of a message we may enter into this machine. We will + # not leave the body of the previous message until this entire from line is + # matched. + fromLine = 'From ' . /[^\n]/* . ' ' . date . '\n' @(new_msg,1) @msgstart; + + # The types of characters that can be used as a header name. + hchar = print - [ :]; + + header = + # The name of the header. + hchar+ $bufHeadName . ':' + # The content of the header. Look out for continuations. + . ( (extend - '\n') $bufHeadContent | '\n'. [ \t] @bufHeadContent )* + # Buffer must end with a newline that does not continue. + . '\n' %finBufHeadContent; + + messageLine = ( extend - '\n' )* . '\n' @(new_msg, 0); + + # When we get to the last newline we are still matching messageLine + # so on the last newline it will think we are still in the message. + # We need this because we can't assume that every newline means + # the end of the current message, whereas at the same time we requre + # that there be a newline before the fromLine of the next message. + message = ( fromLine . header* . '\n' @blankLine . messageLine* . '\n' ); + + # Its important that the priority in the fromLine gets bumped up + # so that we are able to move to new messages. Otherwise we + # will always stay in the message body of the first message. + main := message*; +}%% + +%% write data; + +void MBox::init( ) +{ + MBox *fsm = this; + %% write init; +} + +void MBox::execute( const char *data, int len ) +{ + MBox *fsm = this; + const char *p = data; + const char *pe = data + len; + %%{ + access fsm->; + write exec; + }%% +} + +int MBox::finish( ) +{ + if ( cs == MBox_error ) + return -1; + if ( cs >= MBox_first_final ) + return 1; + return 0; +} + +MBox mbox; + +void test( const char *buf ) +{ + int len = strlen( buf ); + mbox.init(); + mbox.execute( buf, len ); + if ( mbox.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( + "From email address goes here Wed Nov 28 13:30:05 2001 -0500\n" + "Header1: this is the header contents\n" + " there is more on the second line\n" + " and more on the third line.\n" + "Header2: slkdj\n" + "\n" + "This is the message data\n" + "\n" + "From email Wed Nov 28 13:30:05 2001 -0500\n" + "Header: \n" + "\n" + "mail message\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + "There are no headers. \n" + "\n" + "From email Wed Nov 28 13:30:05 EST 2000\n" + "\n" + "There are no headers.\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + ">From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + ); + + test( + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "Header:alsdj\n" + "\n" + "Header:\n" + "salkfj\n" + "\n" + "There are no headers. \n" + "\n" + "From user@host.dom Wed Nov 28 13:30:05 2001\n" + "\n" + "\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +NEW MESSAGE +Header1: this is the header contents there is more on the second line and more on the third line. +Header2: slkdj + +NEW MESSAGE +Header: + +ACCEPT +NEW MESSAGE + +NEW MESSAGE + +ACCEPT +NEW MESSAGE +Header:alsdj + +ACCEPT +NEW MESSAGE +Header:alsdj + +ACCEPT +NEW MESSAGE +Header:alsdj + +NEW MESSAGE + +FAIL +NEW MESSAGE +Header:alsdj + +NEW MESSAGE + +ACCEPT +#endif diff --git a/test/mailbox2.rl b/test/mailbox2.rl new file mode 100644 index 0000000..bbaf820 --- /dev/null +++ b/test/mailbox2.rl @@ -0,0 +1,173 @@ +/* + * @LANG: c++ + * @CFLAGS: -I../aapl + */ + +#include <iostream> +#include <string.h> + +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine mailbox; + + action prn_char { cout << *p; } + action prn_space { cout << ' '; } + action prn_word { cout.write(ws, p-ws); cout << ' '; } + action prn_addr1 { cout << "| "; cout.write(ws+1, p-ws-2); } + action prn_addr2 { cout << "| "; cout.write(ws, p-ws); } + action prn_tab { cout << '\t'; } + action prn_nl { cout << '\n'; } + action prn_separator { cout << "------\n"; } + action prn_from { cout << "FROM\n"; } + action prn_to { cout << "TO\n"; } + action prn_subj { cout << "SUBJECT\n"; } + + action start_word { ws = p; } + action start_headers { preserve = p; } + action end_headers {preserve = 0;} + + day = upper lower{2}; + month = upper lower{2}; + year = digit{4}; + time = digit{2} ':' digit{2} + ( ':' digit{2} )?; + letterZone = upper{3}; + numZone = [+\-] digit{4}; + zone = letterZone | numZone; + dayNum = ( digit | ' ' ) digit; + + date = day ' ' month ' ' + dayNum ' ' time ' ' + ( + year | + year ' ' zone | + zone ' ' year + ); + + fromLine = 'From ' [^\n]* ' ' + date '\n' @start_headers; + + headerChar = print - [ :]; + headersToPrint = 'From' | + 'To' | 'Subject'; + headersToConsume = + headerChar+ - headersToPrint; + + consumeHeader = + headersToConsume ':' + ( + [^\n] | + ( '\n' [ \t] ) + )* + '\n'; + + addrWS = ( [ \t]+ | '\n' [ \t]+ ); + addrComment = '(' [^)]* ')'; + addrWord = [^"'@,<>() \t\n]+; + addrAddr1 = '<' [^>]* '>'; + addrAddr2 = addrWord '@' addrWord; + addrString = + '"' [^"]* '"' | + "'" [^']* "'"; + + addrItem = ( + addrAddr1 %prn_addr1 | + addrAddr2 %prn_addr2 | + addrWord %prn_word | + addrString %prn_word + ) >start_word; + + address = ( + addrWS | + addrComment | + addrItem + )** >prn_tab; + + addrHeader = ( + 'From' %prn_from | + 'To' %prn_to + ) ':' + address ( ',' @prn_nl address )* + '\n' %prn_nl; + + subjectHeader = + 'Subject:' @prn_subj @prn_tab + ' '* <: + ( + [^\n] @prn_char | + ( '\n' [ \t]+ ) %prn_space + )** + '\n' %prn_nl; + + header = consumeHeader | + addrHeader | subjectHeader; + + messageLine = + ( [^\n]* '\n' - fromLine ); + + main := ( + fromLine %prn_separator + header* + '\n' @end_headers + messageLine* + )*; + }%% + +%% write data; + +#define BUFSIZE 8192 + +void test( const char *buf ) +{ + int cs, len = strlen( buf ); + const char *preserve = 0, *ws = 0; + + %% write init; + const char *p = buf; + const char *pe = p + len; + %% write exec; + + if ( cs == mailbox_error ) + cerr << "ERROR" << endl; + + if ( cs < mailbox_first_final ) + cerr << "DID NOT FINISH IN A FINAL STATE" << endl; +} + +int main() +{ + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston\" <thurston@complang.org>\n" + "Subject: the squirrel has landed\n" + "\n" + "Message goes here. \n" + "From (trick from line).\n" + "From: not really a header\n" + "\n" + "From user2@host2.com Wed Nov 28 13:30:05 2001\n" + "To: Edgar Allen Poe <ep@net.com> (da man)\n" + "Subject: (no subject) \n" + "\n" + "Message goes here. \n" + "\n" + ); + return 0; +} + +#ifdef _____OUTPUT_____ +------ +FROM + "Adrian D. Thurston" | thurston@complang.org +SUBJECT + the squirrel has landed +------ +TO + Edgar Allen Poe | ep@net.com +SUBJECT + (no subject) +#endif diff --git a/test/mailbox3.rl b/test/mailbox3.rl new file mode 100644 index 0000000..8039f80 --- /dev/null +++ b/test/mailbox3.rl @@ -0,0 +1,247 @@ +/* + * @LANG: c++ + * @CFLAGS: -I../aapl + */ + +#include <iostream> +#include <string.h> + +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine mailbox; + + action prn_char { cout << *p; } + action prn_space { cout << ' '; } + action prn_word { cout.write(ws, p-ws); cout << ' '; } + action prn_addr1 { cout << "| "; cout.write(ws+1, p-ws-2); } + action prn_addr2 { cout << "| "; cout.write(ws, p-ws); } + action prn_tab { cout << '\t'; } + action prn_nl { cout << '\n'; } + action prn_separator { cout << "------\n"; } + action prn_from { cout << "FROM\n"; } + action prn_to { cout << "TO\n"; } + action prn_subj { cout << "SUBJECT\n"; } + + action start_word { ws = p; } + action start_headers { preserve = p; } + action end_headers {preserve = 0;} + + day = upper lower{2}; + month = upper lower{2}; + year = digit{4}; + time = digit{2} ':' digit{2} + ( ':' digit{2} )?; + letterZone = upper{3}; + numZone = [+\-] digit{4}; + zone = letterZone | numZone; + dayNum = ( digit | ' ' ) digit; + + date = day ' ' month ' ' + dayNum ' ' time ' ' + ( + year | + year ' ' zone | + zone ' ' year + ); + + fromLine = 'From ' [^\n]* ' ' + date '\n' @start_headers; + + headerChar = print - [ :]; + headersToPrint = 'From' | + 'To' | 'Subject'; + headersToConsume = + headerChar+ - headersToPrint; + + action init_hlen {hlen = 0;} + action hlen {hlen++ < 50} + + consumeHeaderBody = + ':' @init_hlen + ( + [^\n] | + ( '\n' [ \t] ) + )* when hlen + '\n'; + + consumeHeader = + headersToConsume consumeHeaderBody; + + addrWS = ( [ \t]+ | '\n' [ \t]+ ); + addrComment = '(' [^)]* ')'; + addrWord = [^"'@,<>() \t\n]+; + addrAddr1 = '<' [^>]* '>'; + addrAddr2 = addrWord '@' addrWord; + addrString = + '"' [^"]* '"' | + "'" [^']* "'"; + + addrItem = ( + addrAddr1 %prn_addr1 | + addrAddr2 %prn_addr2 | + addrWord %prn_word | + addrString %prn_word + ) >start_word; + + address = ( + addrWS | + addrComment | + addrItem + )** >prn_tab; + + addrHeader = ( + 'From' %prn_from | + 'To' %prn_to + ) ':' @init_hlen + ( address ( ',' @prn_nl address )* ) when hlen + '\n' %prn_nl; + + subjectHeader = + 'Subject:' @prn_subj @prn_tab @init_hlen + ( + ' '* <: + ( + [^\n] @prn_char | + ( '\n' [ \t]+ ) %prn_space + )** + ) when hlen + '\n' %prn_nl; + + header = consumeHeader | + addrHeader | subjectHeader; + + messageLine = + ( [^\n]* when hlen '\n' @init_hlen ) - fromLine; + + main := ( + fromLine %prn_separator + header* + '\n' @end_headers @init_hlen + messageLine* + )*; + }%% + +%% write data; + +#define BUFSIZE 8192 + +void test( const char *buf ) +{ + int cs, len = strlen( buf ); + const char *preserve = 0, *ws = 0; + int hlen = 0; + + %% write init; + const char *p = buf; + const char *pe = p + len; + %% write exec; + + if ( cs < mailbox_first_final ) { + cout << endl << endl; + cout << "DID NOT FINISH IN A FINAL STATE" << endl; + } +} + +int main() +{ + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston\" <thurston@complang.org>\n" + "Subject: the squirrel has landed\n" + "\n" + "Message goes here. \n" + "From (trick from line).\n" + "From: not really a header\n" + "\n" + "From user2@host2.com Wed Nov 28 13:30:05 2001\n" + "To: \"(kill 1)\" Edgar Allen Poe <ep@net.com> (da man)\n" + "Subject: (no subject) this is a really long subject which should fail the length constraint \n" + "Other: 0123456789\n" + "\n" + "Message goes here. \n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "To: \"(kill 2)\" some guy <sg@net.com>\n" + "From: \"Adrian D. Thurston this name is far too long\" <thurston@complang.org>\n" + "Subject: the squirrel has landed\n" + "\n" + "From user2@host2.com Wed Nov 28 13:30:05 2001\n" + "To: Edgar Allen Poe <ep@net.com> (da man)\n" + "Subject: (no subject) \n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "To: \"(kill 3)\" some guy <sg@net.com>\n" + "From: \"Adrian D. Thurston This name is fore sure absolutely too long\" <t@cs.ca>\n" + "Subject: the squirrel has landed\n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston \" <t@cs.ca>\n" + "Subject: (kill 4) the squirrel has landed\n" + "Other: This is another header field, not interpreted, that is too long\n" + "\n" + ); + test( + "From user@host.com Wed Nov 28 13:30:05 2001\n" + "From: \"Adrian D. Thurston \" <t@cs.ca>\n" + "Subject: (kill 5)the squirrel has landed\n" + "\n" + "This message line is okay.\n" + "But this message line is far too long and will cause an error.\n" + ); + return 0; +} + +#ifdef _____OUTPUT_____ +------ +FROM + "Adrian D. Thurston" | thurston@complang.org +SUBJECT + the squirrel has landed +------ +TO + "(kill 1)" Edgar Allen Poe | ep@net.com +SUBJECT + (no subject) this is a really long subject whic + +DID NOT FINISH IN A FINAL STATE +------ +TO + "(kill 2)" some guy | sg@net.com +FROM + "Adrian D. Thurston this name is far too long" + +DID NOT FINISH IN A FINAL STATE +------ +TO + "(kill 3)" some guy | sg@net.com +FROM + + +DID NOT FINISH IN A FINAL STATE +------ +FROM + "Adrian D. Thurston " | t@cs.ca +SUBJECT + (kill 4) the squirrel has landed + + +DID NOT FINISH IN A FINAL STATE +------ +FROM + "Adrian D. Thurston " | t@cs.ca +SUBJECT + (kill 5)the squirrel has landed + + +DID NOT FINISH IN A FINAL STATE +#endif diff --git a/test/minimize1.rl b/test/minimize1.rl new file mode 100644 index 0000000..c550ebb --- /dev/null +++ b/test/minimize1.rl @@ -0,0 +1,81 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct min +{ + int cs; +}; + +%%{ + machine min; + variable cs fsm->cs; + + action a_or_b { printf("a or b\n"); } + + main := ( + ( 'a' . [ab]* @a_or_b ) | + ( 'b' . [ab]* @a_or_b ) + ) . '\n'; +}%% + +%% write data; + +void min_init( struct min *fsm ) +{ + %% write init; +} + +void min_execute( struct min *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int min_finish( struct min *fsm ) +{ + if ( fsm->cs == min_error ) + return -1; + if ( fsm->cs >= min_first_final ) + return 1; + return 0; +} + +struct min fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + min_init( &fsm ); + min_execute( &fsm, buf, len ); + if ( min_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "aaaaaa\n" ); + test( "a\n" ); + test( "abc\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +a or b +a or b +a or b +a or b +a or b +ACCEPT +ACCEPT +a or b +FAIL +#endif diff --git a/test/patact.rl b/test/patact.rl new file mode 100644 index 0000000..864299d --- /dev/null +++ b/test/patact.rl @@ -0,0 +1,100 @@ +/* + * @LANG: indep + */ + +char comm; +int top; +int stack[32]; +ptr ts; +ptr te; +int act; +int val; +%% +%%{ + machine patact; + + other := |* + [a-z]+ => { prints "word\n"; }; + [0-9]+ => { prints "num\n"; }; + [\n ] => { prints "space\n"; }; + *|; + + exec_test := |* + [a-z]+ => { prints "word (w/lbh)\n"; fexec te-1; fgoto other; }; + [a-z]+ ' foil' => { prints "word (c/lbh)\n"; }; + [\n ] => { prints "space\n"; }; + '22' => { prints "num (w/switch)\n"; }; + [0-9]+ => { prints "num (w/switch)\n"; fexec te-1; fgoto other;}; + [0-9]+ ' foil' => {prints "num (c/switch)\n"; }; + '!';# => { prints "immdiate\n"; fgoto exec_test; }; + *|; + + semi := |* + ';' => { prints "in semi\n"; fgoto main; }; + *|; + + main := |* + [a-z]+ => { prints "word (w/lbh)\n"; fhold; fgoto other; }; + [a-z]+ ' foil' => { prints "word (c/lbh)\n"; }; + [\n ] => { prints "space\n"; }; + '22' => { prints "num (w/switch)\n"; }; + [0-9]+ => { prints "num (w/switch)\n"; fhold; fgoto other;}; + [0-9]+ ' foil' => {prints "num (c/switch)\n"; }; + ';' => { prints "going to semi\n"; fhold; fgoto semi;}; + '!' => { prints "immdiate\n"; fgoto exec_test; }; + *|; +}%% +/* _____INPUT_____ +"abcd foix\n" +"abcd\nanother\n" +"123 foix\n" +"!abcd foix\n" +"!abcd\nanother\n" +"!123 foix\n" +";" +_____INPUT_____ */ +/* _____OUTPUT_____ +word (w/lbh) +word +space +word +space +ACCEPT +word (w/lbh) +word +space +word +space +ACCEPT +num (w/switch) +num +space +word +space +ACCEPT +immdiate +word (w/lbh) +word +space +word +space +ACCEPT +immdiate +word (w/lbh) +word +space +word +space +ACCEPT +immdiate +num (w/switch) +num +space +word +space +ACCEPT +going to semi +in semi +ACCEPT +_____OUTPUT_____ */ + diff --git a/test/range.rl b/test/range.rl new file mode 100644 index 0000000..43e6214 --- /dev/null +++ b/test/range.rl @@ -0,0 +1,74 @@ +/* + * @LANG: c + */ + +#include <stdio.h> +#include <string.h> + +struct range +{ + int cs; +}; + +%%{ + machine range; + variable cs fsm->cs; + + main := ( 'a' .. 'c' | 'c' .. 'e' | 'm' .. 'n' | 'a' .. 'z' ) '\n'; +}%% + +%% write data; + +void range_init( struct range *fsm ) +{ + %% write init; +} + +void range_execute( struct range *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int range_finish( struct range *fsm ) +{ + if ( fsm->cs == range_error ) + return -1; + if ( fsm->cs >= range_first_final ) + return 1; + return 0; +} + +struct range fsm; + +void test( char *buf ) +{ + int len = strlen( buf ); + range_init( &fsm ); + range_execute( &fsm, buf, len ); + if ( range_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( "a\n" ); + test( "z\n" ); + test( "g\n" ); + test( "no\n" ); + test( "1\n" ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +FAIL +FAIL +#endif diff --git a/test/recdescent1.rl b/test/recdescent1.rl new file mode 100644 index 0000000..1ffca28 --- /dev/null +++ b/test/recdescent1.rl @@ -0,0 +1,128 @@ +/* + * @LANG: c + * Test growable stack. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +%%{ + machine recdescent; + + prepush { + if ( top == stack_size ) { + printf( "growing stack\n" ); + stack_size = top * 2; + stack = (int*)realloc( stack, sizeof(int)*stack_size ); + } + } + + postpop { + if ( stack_size > (top * 4) ) { + stack_size = top * 2; + stack = (int*)realloc( stack, sizeof(int)*stack_size ); + printf( "shrinking stack\n" ); + } + } + + action item_start { item = p; } + + action item_finish + { + printf( "item: " ); + fwrite( item, 1, p-item, stdout ); + printf( "\n" ); + } + + action call_main + { + printf( "calling main\n" ); + fcall main; + } + + action return_main + { + if ( top == 0 ) { + printf( "STRAY CLOSE\n" ); + fbreak; + } + + printf( "returning from main\n" ); + fhold; + fret; + } + + id = [a-zA-Z_]+; + number = [0-9]+; + ws = [ \t\n]+; + + main := ( + ws | + ( number | id ) >item_start %item_finish | + + '{' @call_main '}' | + + '}' @return_main + )**; +}%% + +%% write data; + +void test( char *buf ) +{ + int cs; + int *stack; + int top, stack_size; + char *p, *pe, *eof, *item = 0; + + int len = strlen( buf ); + + %% write init; + + stack_size = 1; + stack = (int*)malloc( sizeof(int) * stack_size ); + + p = buf; + pe = buf + len; + eof = pe; + + %% write exec; + + if ( cs == recdescent_error ) { + /* Machine failed before finding a token. */ + printf( "PARSE ERROR\n" ); + } +} + +int main() +{ + test( "88 foo { 99 {{{{}}}}{ } }"); + test( "76 } sadf"); + return 0; +} + +#ifdef _____OUTPUT_____ +item: 88 +item: foo +calling main +item: 99 +calling main +growing stack +calling main +growing stack +calling main +calling main +growing stack +returning from main +returning from main +returning from main +returning from main +shrinking stack +calling main +returning from main +returning from main +shrinking stack +item: 76 +STRAY CLOSE +#endif diff --git a/test/recdescent2.rl b/test/recdescent2.rl new file mode 100644 index 0000000..59c4586 --- /dev/null +++ b/test/recdescent2.rl @@ -0,0 +1,116 @@ +/* + * @LANG: java + */ + +class recdescent2 +{ + %%{ + machine recdescent; + + prepush { + if ( top == stack_size ) { + System.out.print( "growing stack\n" ); + stack_size = top * 2; + // Don't actually bother to resize here, but we do print messages. + //stack = (int*)realloc( stack, sizeof(int)*stack_size ); + } + } + + postpop { + if ( stack_size > (top * 4) ) { + stack_size = top * 2; + // Don't actually bother to resize here, but we do print messages. + //stack = (int*)realloc( stack, sizeof(int)*stack_size ); + System.out.print( "shrinking stack\n" ); + } + } + + action item_start { item = p; } + + action item_finish + { + String item_data = new String ( data, item, p-item ); + System.out.print( "item: " ); + System.out.print( item_data ); + System.out.print( "\n" ); + } + + action call_main + { + System.out.print( "calling main\n" ); + fcall main; + } + + action return_main + { + if ( top == 0 ) { + System.out.print( "STRAY CLOSE\n" ); + fbreak; + } + + System.out.print( "returning from main\n" ); + fhold; + fret; + } + + id = [a-zA-Z_]+; + number = [0-9]+; + ws = [ \t\n]+; + + main := ( + ws | + ( number | id ) >item_start %item_finish | + + '{' @call_main '}' | + + '}' @return_main + )**; + }%% + + %% write data; + + static void test( char data[] ) + { + int cs, p = 0, pe = data.length, eof = data.length, item = 0; + int stack[] = new int[1024]; + int stack_size = 1; + int top; + + %% write init; + %% write exec; + + if ( cs == recdescent_error ) + System.out.println( "SCANNER ERROR" ); + } + + public static void main( String args[] ) + { + test( "88 foo { 99 {{{{}}}}{ } }".toCharArray() ); + test( "76 } sadf".toCharArray() ); + } +} + +/* _____OUTPUT_____ +item: 88 +item: foo +calling main +item: 99 +calling main +growing stack +calling main +growing stack +calling main +calling main +growing stack +returning from main +returning from main +returning from main +returning from main +shrinking stack +calling main +returning from main +returning from main +shrinking stack +item: 76 +STRAY CLOSE +*/ diff --git a/test/recdescent3.rl b/test/recdescent3.rl new file mode 100644 index 0000000..1216b43 --- /dev/null +++ b/test/recdescent3.rl @@ -0,0 +1,117 @@ +# +# @LANG: ruby +# + +%%{ + machine recdescent3; + + prepush { + if top == stack_size + print( "growing stack\n" ); + stack_size = top * 2; + # Don't actually bother to resize here, but we do print messages. + # stack = (int*)realloc( stack, sizeof(int)*stack_size ); + end + } + + postpop { + if stack_size > (top * 4) + print( "shrinking stack\n" ); + stack_size = top * 2; + # Don't actually bother to resize here, but we do print messages. + # stack = (int*)realloc( stack, sizeof(int)*stack_size ); + end + } + + action item_start { item = p; } + + action item_finish + { + print( "item: " ); + print( data[item..p-1] ); + print( "\n" ); + } + + action call_main + { + print( "calling main\n" ); + fcall main; + } + + action return_main + { + if top == 0 + print( "STRAY CLOSE\n" ); + fbreak; + end + + print( "returning from main\n" ); + fhold; + fret; + } + + id = [a-zA-Z_]+; + number = [0-9]+; + ws = [ \t\n]+; + + main := ( + ws | + ( number | id ) >item_start %item_finish | + + '{' @call_main '}' | + + '}' @return_main + )**; +}%% + +%% write data; + +def run_machine( data ) + item = 0; + p = 0; + pe = data.length; + eof = pe; + cs = 0; + stack = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; + stack_size = 1; + top = 0; + + %% write init; + %% write exec; + + if cs == recdescent3_error + puts "SCANNER_ERROR" + end +end + +inp = [ + "88 foo { 99 {{{{}}}}{ } }", + "76 } sadf" +] + +inp.each { |str| run_machine(str) } + +=begin _____OUTPUT_____ +item: 88 +item: foo +calling main +item: 99 +calling main +growing stack +calling main +growing stack +calling main +calling main +growing stack +returning from main +returning from main +returning from main +returning from main +shrinking stack +calling main +returning from main +returning from main +shrinking stack +item: 76 +STRAY CLOSE +=end _____OUTPUT_____ diff --git a/test/repetition.rl b/test/repetition.rl new file mode 100644 index 0000000..328cfa9 --- /dev/null +++ b/test/repetition.rl @@ -0,0 +1,293 @@ +/* + * @LANG: c++ + */ + +/* Test repeptition operators. */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +using namespace std; + +struct Rep +{ + int cs; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine Rep; + + action begin { cout << "begin" << endl; } + action in { cout << "in" << endl; } + action end { cout << "end" << endl; } + + a = 'a' >begin @in %end; + b = 'b' >begin @in %end; + c = 'c' >begin @in %end; + d = 'd' >begin @in %end; + + main := + ( a {5} '\n' )* '-\n' + ( b {,5} '\n' )* '-\n' + ( c {5,} '\n' )* '-\n' + ( d {2,5} '\n' )*; +}%% + +%% write data; + +int Rep::init( ) +{ + %% write init; + return 1; +} + +int Rep::execute( const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; + + if ( cs == Rep_error ) + return -1; + if ( cs >= Rep_first_final ) + return 1; + return 0; +} + +int Rep::finish( ) +{ + if ( cs == Rep_error ) + return -1; + if ( cs >= Rep_first_final ) + return 1; + return 0; +} + +void test( const char *buf ) +{ + Rep rep; + int len = strlen( buf ); + rep.init(); + rep.execute( buf, len ); + if ( rep.finish() > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + +int main() +{ + test( + "aaaaa\n" + "-\n" + "\n" + "b\n" + "bb\n" + "bbb\n" + "bbbb\n" + "bbbbb\n" + "-\n" + "ccccc\n" + "ccccccc\n" + "cccccccccc\n" + "-\n" + "dd\n" + "ddd\n" + "dddd\n" + "ddddd\n" + ); + + test( + "a\n" + "-\n" + "b\n" + "-\n" + "c\n" + "-\n" + "d\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +begin +in +end +ACCEPT +begin +in +FAIL +#endif diff --git a/test/rlscan.rl b/test/rlscan.rl new file mode 100644 index 0000000..448b979 --- /dev/null +++ b/test/rlscan.rl @@ -0,0 +1,289 @@ +/* + * Lexes Ragel input files. + * + * @LANG: c++ + * + * Test works with split code gen. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +using namespace std; + +void escapeXML( const char *data ) +{ + while ( *data != 0 ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + data += 1; + } +} + +void escapeXML( char c ) +{ + switch ( c ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << c; break; + } +} + +void escapeXML( const char *data, int len ) +{ + for ( const char *end = data + len; data != end; data++ ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + } +} + +inline void write( const char *data ) +{ + cout << data; +} + +inline void write( char c ) +{ + cout << c; +} + +inline void write( const char *data, int len ) +{ + cout.write( data, len ); +} + + +%%{ + machine RagelScan; + + word = [a-zA-Z_][a-zA-Z_0-9]*; + integer = [0-9]+; + hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; + + default = ^0; + EOF = 0; + + # Handles comments in outside code and inline blocks. + c_comment := + ( default* :>> '*/' ) + ${ escapeXML( fc ); } + @{ fret; }; + + action emit { + escapeXML( ts, te-ts ); + } + + # + # Inline action code + # + + ilscan := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + '/*' { + write( "/*" ); + fcall c_comment; + }; + '//' [^\n]* '\n' => emit; + + '{' { + write( '{' ); + inline_depth += 1; + }; + + '}' { + write( '}' ); + /* If dropping down to the last } then return + * to ragel code. */ + if ( --inline_depth == 0 ) { + write( "</inline>\n" ); + fgoto rlscan; + } + }; + + default => { escapeXML( *ts ); }; + *|; + + # + # Ragel Tokens + # + + rlscan := |* + '}%%' { + if ( !single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + '\n' { + if ( single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + # Word + word { + write( "<word>" ); + write( ts, te-ts ); + write( "</word>\n" ); + }; + + # Decimal integer. + integer { + write( "<int>" ); + write( ts, te-ts ); + write( "</int>\n" ); + }; + + # Hexidecimal integer. + hex { + write( "<hex>" ); + write( ts, te-ts ); + write( "</hex>\n" ); + }; + + # Consume comments. + '#' [^\n]* '\n'; + + # Single literal string. + "'" ( [^'\\] | /\\./ )* "'" { + write( "<single_lit>" ); + escapeXML( ts, te-ts ); + write( "</single_lit>\n" ); + }; + + # Double literal string. + '"' ( [^"\\] | /\\./ )* '"' { + write( "<double_lit>" ); + escapeXML( ts, te-ts ); + write( "</double_lit>\n" ); + }; + + # Or literal. + '[' ( [^\]\\] | /\\./ )* ']' { + write( "<or_lit>" ); + escapeXML( ts, te-ts ); + write( "</or_lit>\n" ); + }; + + # Regex Literal. + '/' ( [^/\\] | /\\./ ) * '/' { + write( "<re_lit>" ); + escapeXML( ts, te-ts ); + write( "</re_lit>\n" ); + }; + + # Open an inline block + '{' { + inline_depth = 1; + write( "<inline>{" ); + fgoto ilscan; + }; + + punct { + write( "<symbol>" ); + escapeXML( fc ); + write( "</symbol>\n" ); + }; + + default; + *|; + + # + # Outside code. + # + + main := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + + '/*' { + escapeXML( ts, te-ts ); + fcall c_comment; + }; + + '//' [^\n]* '\n' => emit; + + '%%{' { + write( "<section>\n" ); + single_line = false; + fgoto rlscan; + }; + + '%%' { + write( "<section>\n" ); + single_line = true; + fgoto rlscan; + }; + + default { + escapeXML( *ts ); + }; + + # EOF. + EOF; + *|; +}%% + +%% write data nofinal; + +void test( const char *data ) +{ + std::ios::sync_with_stdio(false); + + int cs, act; + const char *ts, *te; + int stack[1], top; + + bool single_line = false; + int inline_depth = 0; + + %% write init; + + /* Read in a block. */ + const char *p = data; + const char *pe = data + strlen( data ); + const char *eof = pe; + %% write exec; + + if ( cs == RagelScan_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } +} + +#define BUFSIZE 2048 + +int main() +{ + std::ios::sync_with_stdio(false); + + test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n"); + + return 0; +} +#ifdef _____OUTPUT_____ +hi <section> +<re_lit>/'}%%'/</re_lit> +<inline>{ /*{*/ {} }</inline> +<symbol>+</symbol> +<single_lit>'\''</single_lit> +</section> +there +#endif diff --git a/test/ruby1.rl b/test/ruby1.rl new file mode 100644 index 0000000..e2f4bc9 --- /dev/null +++ b/test/ruby1.rl @@ -0,0 +1,56 @@ +# +# @LANG: ruby +# +# Test the host language scanning for ruby. +# + +# %%{ +a = 1 +b = /%%\{\}/; + +%%{ + machine ruby1; + + main := lower+ digit+ '\n' @{ + + # } + c = 1 + d = /\}/ + puts "NL" + }; +}%% + +# %%{ +e = 1 +f = /%%\{\}/; + +%% write data; + +# %%{ +g = 1 +h = /%%\{\}/; + +def run_machine( data ) + p = 0; + pe = data.length + cs = 0 + + %% write init; + %% write exec; + if cs >= ruby1_first_final + puts "ACCEPT" + else + puts "FAIL" + end +end + +inp = [ + "abc1231\n", +] + +inp.each { |str| run_machine(str) } + +=begin _____OUTPUT_____ +NL +ACCEPT +=end _____OUTPUT_____ diff --git a/test/runtests.in b/test/runtests.in new file mode 100755 index 0000000..2d6cae7 --- /dev/null +++ b/test/runtests.in @@ -0,0 +1,335 @@ +#!/bin/bash + +# +# Copyright 2006-2009 Adrian Thurston <thurston@complang.org> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +while getopts "gcnmleT:F:G:P:CDJRAZ" opt; do + case $opt in + T|F|G|P) + genflags="$genflags -$opt$OPTARG" + options="$options -$opt$OPTARG" + ;; + n|m|l|e) + minflags="$minflags -$opt" + options="$options -$opt" + ;; + c) + compile_only="true" + options="$options -$opt" + ;; + g) + allow_generated="true" + ;; + C|D|J|R|A|Z) + langflags="$langflags -$opt" + ;; + esac +done + +[ -z "$minflags" ] && minflags="-n -m -l -e" +[ -z "$genflags" ] && genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2" +[ -z "$langflags" ] && langflags="-C -D -J -R -A -Z" + +shift $((OPTIND - 1)); + +[ -z "$*" ] && set -- *.rl + +config=../src/config.h +ragel=../ragel/ragel + +cxx_compiler="@CXX@" +c_compiler="@CC@" +objc_compiler="@GOBJC@" +d_compiler="@GDC@" +java_compiler="@JAVAC@" +txl_engine="@TXL@" +ruby_engine="@RUBY@" +csharp_compiler="@GMCS@" +go_compiler="@GOBIN@" + +function test_error +{ + exit 1; +} + +# split_objs="" +# if test $split_iters != "$gen_opt"; then +# n=0; +# while test $n -lt $split_iters; do +# part_root=${root}_`awk 'BEGIN { +# width = 0; +# high = '$split_iters' - 1; +# while ( high > 0 ) { +# width = width + 1; +# high = int(high / 10); +# } +# suffFormat = "%" width "." width "d\n"; +# printf( suffFormat, '$n' ); +# exit 0; +# }'` +# part_src=${part_root}.c +# part_bin=${part_root}.o +# echo "$compiler -c $cflags -o $part_bin $part_src" +# if ! $compiler -c $cflags -o $part_bin $part_src; then +# test_error; +# fi +# split_objs="$split_objs $part_bin" +# n=$((n+1)) +# done +# fi + +function run_test() +{ + echo "$ragel $lang_opt $min_opt $gen_opt -o $code_src $test_case" + if ! $ragel $lang_opt $min_opt $gen_opt -o $code_src $test_case; then + test_error; + fi + + out_args="" + [ $lang != java ] && out_args="-o ${binary}"; + [ $lang == csharp ] && out_args="-out:${binary}"; + + # Ruby doesn't need to be compiled. + if [ $lang != ruby ]; then + echo "$compiler ${cflags} ${out_args} ${code_src}" + if ! $compiler ${cflags} ${out_args} ${code_src}; then + test_error; + fi + fi + + if [ "$compile_only" != "true" ]; then + echo -n "running $root ... "; + + exec_cmd=./$binary + [ $lang = java ] && exec_cmd="java ${root}" + [ $lang = ruby ] && exec_cmd="ruby ${code_src}" + [ $lang = csharp ] && [ "$csharp_compiler" = gmcs ] && exec_cmd="mono ${exec_cmd}" + + $exec_cmd 2>&1 > $output; + if diff --strip-trailing-cr $expected_out $output > /dev/null; then + echo "passed"; + else + echo "FAILED"; + test_error; + fi; + fi +} + +for test_case; do + root=${test_case%.rl}; + + if ! [ -f "$test_case" ]; then + echo "runtests: not a file: $test_case"; >&2 + exit 1; + fi + + # Check if we should ignore the test case + ignore=`sed '/@IGNORE:/s/^.*: *//p;d' $test_case` + if [ "$ignore" = yes ]; then + continue; + fi + + # If the generated flag is given make sure that the test case is generated. + is_generated=`sed '/@GENERATED:/s/^.*: *//p;d' $test_case` + if [ "$is_generated" = yes ] && [ "$allow_generated" != true ]; then + continue; + fi + + expected_out=$root.exp; + sed '1,/_____OUTPUT_____/d;$d' $test_case > $expected_out + + lang=`sed '/@LANG:/s/^.*: *//p;d' $test_case` + if [ -z "$lang" ]; then + echo "$test_case: language unset"; >&2 + exit 1; + fi + + case $lang in + c++) + lang_opt=-C; + code_suffix=cpp; + compiler=$cxx_compiler; + cflags="-pedantic -ansi -Wall -O3" + ;; + d) + lang_opt=-D; + code_suffix=d; + compiler=$d_compiler; + cflags="-Wall -O3" + ;; + c) + lang_opt=-C; + code_suffix=c; + compiler=$c_compiler; + cflags="-pedantic -ansi -Wall -O3" + ;; + obj-c) + lang_opt=-C; + code_suffix=m; + compiler=$objc_compiler + cflags="-Wall -O3 -fno-strict-aliasing -lobjc" + ;; + java) + lang_opt=-J; + code_suffix=java; + compiler=$java_compiler + cflags="" + ;; + ruby) + lang_opt=-R; + code_suffix=rb; + compiler=$ruby_engine + cflags="" + ;; + csharp) + lang_opt="-A"; + code_suffix=cs; + compiler=$csharp_compiler + cflags="" + ;; + go) + lang_opt="-Z" + code_suffix=go + compiler=$go_compiler + cflags="" + ;; + indep) + lang_opt=""; + + # If we have no txl engine then skip this test. + [ -z "$txl_engine" ] && continue + for lang in c d java ruby csharp go; do + case $lang in + c) lf="-C";; + d) lf="-D";; + java) lf="-J";; + ruby) lf="-R";; + csharp) lf="-A";; + go) lf="-Z";; + esac + + echo "$langflags" | grep -e $lf >/dev/null || continue + + targ=${root}_$lang.rl + echo "./langtrans_$lang.sh $test_case > $targ" + if ! ./langtrans_$lang.sh $test_case > $targ; then + test_error + fi + echo "./runtests -g $options $targ" + if ! ./runtests -g $options $targ; then + test_error + fi + done + continue; + ;; + *) + echo "$test_case: unknown language type $lang" >&2 + exit 1; + ;; + esac + + # Make sure that we are interested in the host language. + echo "$langflags" | grep -e $lang_opt >/dev/null || continue + + code_src=$root.$code_suffix; + binary=$root.bin; + output=$root.out; + + # If we have no compiler for the source program then skip it. + [ -z "$compiler" ] && continue + + additional_cflags=`sed '/@CFLAGS:/s/^.*: *//p;d' $test_case` + [ -n "$additional_cflags" ] && cflags="$cflags $additional_cflags" + + allow_minflags=`sed '/@ALLOW_MINFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_minflags" ] && allow_minflags="-n -m -l -e" + + case $lang in + c|c++|d) + # Using genflags, get the allowed gen flags from the test case. If the + # test case doesn't specify assume that all gen flags are allowed. + allow_genflags=`sed '/@ALLOW_GENFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_genflags" ] && allow_genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2" + + for min_opt in $minflags; do + echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue + for gen_opt in $genflags; do + echo "$allow_genflags" | grep -e $gen_opt >/dev/null || continue + run_test + done + done + ;; + + java) + # Not interested in gen opt. + gen_opt="" + for min_opt in $minflags; do + echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue + run_test + done + ;; + + ruby) + # Using genflags, get the allowed gen flags from the test case. If the + # test case doesn't specify assume that all gen flags are allowed. + allow_genflags=`sed '/@ALLOW_GENFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_genflags" ] && allow_genflags="-T0 -T1 -F0 -F1" + + for min_opt in $minflags; do + echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue + + for gen_opt in $genflags; do + echo "$allow_genflags" | grep -e $gen_opt >/dev/null || continue + run_test + done + done + ;; + + csharp) + # Using genflags, get the allowed gen flags from the test case. If the + # test case doesn't specify assume that all gen flags are allowed. + allow_genflags=`sed '/@ALLOW_GENFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_genflags" ] && allow_genflags="-T0 -T1 -F0 -F1 -G0 -G1" + + for min_opt in $minflags; do + echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue + for gen_opt in $genflags; do + echo "$allow_genflags" | grep -e $gen_opt >/dev/null || continue + run_test + done + done + ;; + + go) + # Using genflags, get the allowed gen flags from the test case. If the + # test case doesn't specify assume that all gen flags are allowed. + allow_genflags=`sed '/@ALLOW_GENFLAGS:/s/^.*: *//p;d' $test_case` + [ -z "$allow_genflags" ] && allow_genflags="-T0 -T1 -F0 -F1 -G0 -G1 -G2" + + for min_opt in $minflags; do + echo "$allow_minflags" | grep -e $min_opt >/dev/null || continue + for gen_opt in $genflags; do + echo "$allow_genflags" | grep -e $gen_opt >/dev/null || continue + run_test + done + done + ;; + esac +done diff --git a/test/scan1.rl b/test/scan1.rl new file mode 100644 index 0000000..df0971a --- /dev/null +++ b/test/scan1.rl @@ -0,0 +1,64 @@ +/* + * @LANG: indep + */ +ptr ts; +ptr te; +int act; +int token; +%% +%%{ + machine scanner; + + # Warning: changing the patterns or the input string will affect the + # coverage of the scanner action types. + main := |* + 'a' => { + prints "on last "; + if ( p+1 == te ) + prints "yes"; + prints "\n"; + }; + + 'b'+ => { + prints "on next "; + if ( p+1 == te ) + prints "yes"; + prints "\n"; + }; + + 'c1' 'dxxx'? => { + prints "on lag "; + if ( p+1 == te ) + prints "yes"; + prints "\n"; + }; + + 'd1' => { + prints "lm switch1 "; + if ( p+1 == te ) + prints "yes"; + prints "\n"; + }; + 'd2' => { + prints "lm switch2 "; + if ( p+1 == te ) + prints "yes"; + prints "\n"; + }; + + [d0-9]+ '.'; + + '\n'; + *|; +}%% +/* _____INPUT_____ +"abbc1d1d2\n" +_____INPUT_____ */ +/* _____OUTPUT_____ +on last yes +on next yes +on lag yes +lm switch1 yes +lm switch2 yes +ACCEPT +_____OUTPUT_____ */ diff --git a/test/scan2.rl b/test/scan2.rl new file mode 100644 index 0000000..a1ae959 --- /dev/null +++ b/test/scan2.rl @@ -0,0 +1,34 @@ +/* + * @LANG: indep + */ +ptr ts; +ptr te; +int act; +int token; +%% +%%{ + machine scanner; + + # Warning: changing the patterns or the input string will affect the + # coverage of the scanner action types. + main := |* + 'a' => { + prints "pat1\n"; + }; + + [ab]+ . 'c' => { + prints "pat2\n"; + }; + + any => { + prints "any\n"; + }; + *|; +}%% +/* _____INPUT_____ +"a" +_____INPUT_____ */ +/* _____OUTPUT_____ +pat1 +ACCEPT +_____OUTPUT_____ */ diff --git a/test/scan3.rl b/test/scan3.rl new file mode 100644 index 0000000..ca1a136 --- /dev/null +++ b/test/scan3.rl @@ -0,0 +1,32 @@ +/* + * @LANG: indep + */ +ptr ts; +ptr te; +int act; +int token; +%% +%%{ + machine scanner; + + # Warning: changing the patterns or the input string will affect the + # coverage of the scanner action types. + main := |* + 'a' => { + prints "pat1\n"; + }; + 'b' => { + prints "pat2\n"; + }; + [ab] any* => { + prints "pat3\n"; + }; + *|; +}%% +/* _____INPUT_____ +"ab89" +_____INPUT_____ */ +/* _____OUTPUT_____ +pat3 +ACCEPT +_____OUTPUT_____ */ diff --git a/test/scan4.rl b/test/scan4.rl new file mode 100644 index 0000000..12d4d4c --- /dev/null +++ b/test/scan4.rl @@ -0,0 +1,33 @@ +/* + * @LANG: indep + */ +ptr ts; +ptr te; +int act; +int token; +%% +%%{ + machine scanner; + + # Warning: changing the patterns or the input string will affect the + # coverage of the scanner action types. + main := |* + 'a' => { + prints "pat1\n"; + }; + + [ab]+ . 'c' => { + prints "pat2\n"; + }; + + any; + *|; +}%% +/* _____INPUT_____ +"ba a" +_____INPUT_____ */ +/* _____OUTPUT_____ +pat1 +pat1 +ACCEPT +_____OUTPUT_____ */ diff --git a/test/stateact1.rl b/test/stateact1.rl new file mode 100644 index 0000000..ef50c75 --- /dev/null +++ b/test/stateact1.rl @@ -0,0 +1,48 @@ +/* + * @LANG: indep + * + * Test in and out state actions. + */ +%% +%%{ + machine state_act; + + action a1 { prints "a1\n"; } + action a2 { prints "a2\n"; } + action b1 { prints "b1\n"; } + action b2 { prints "b2\n"; } + action c1 { prints "c1\n"; } + action c2 { prints "c2\n"; } + action next_again {fnext again;} + + hi = 'hi'; + line = again: + hi + >to b1 + >from b2 + '\n' + >to c1 + >from c2 + @next_again; + + main := line* + >to a1 + >from a2; +}%% + +/* _____INPUT_____ +"hi\nhi\n" +_____INPUT_____ */ + +/* _____OUTPUT_____ +a2 +b2 +c1 +c2 +b1 +b2 +c1 +c2 +b1 +FAIL +_____OUTPUT_____ */ diff --git a/test/statechart1.rl b/test/statechart1.rl new file mode 100644 index 0000000..884f3f0 --- /dev/null +++ b/test/statechart1.rl @@ -0,0 +1,100 @@ +/* + * @LANG: c + */ + +/* + * Test in and out state actions. + */ + +#include <stdio.h> +#include <string.h> + +struct state_chart +{ + int cs; +}; + +%%{ + machine state_chart; + variable cs fsm->cs; + + action a { printf("a"); } + action b { printf("b"); } + action hexa { printf("a"); } + action hexb { printf("b"); } + + hex_a = '0x' '0'* '61' @hexa; + hex_b = '0x' '0'* '62' @hexb; + + a = 'a' @a | hex_a; + b = 'b' @b | hex_b; + ws = ' '+; + + mach = + start: ( + a -> st1 | + b -> st2 | + zlen -> final + ), + st1: ( + a -> st1 | + ws -> start | + zlen -> final + ), + st2: ( + b -> st2 | + ws -> start | + zlen -> final + ); + + main := ( mach '\n' )*; +}%% + +%% write data; + +void state_chart_init( struct state_chart *fsm ) +{ + %% write init; +} + +void state_chart_execute( struct state_chart *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int state_chart_finish( struct state_chart *fsm ) +{ + if ( fsm->cs == state_chart_error ) + return -1; + if ( fsm->cs >= state_chart_first_final ) + return 1; + return 0; +} + +struct state_chart sc; + +void test( char *buf ) +{ + int len = strlen( buf ); + state_chart_init( &sc ); + state_chart_execute( &sc, buf, len ); + state_chart_finish( &sc ); + printf("\n"); +} + +int main() +{ + test( + "aa0x0061aa b\n" + "bbb0x62b 0x61 0x000062\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +aaaaabbbbbbab +#endif diff --git a/test/strings1.rl b/test/strings1.rl new file mode 100644 index 0000000..0d5eea8 --- /dev/null +++ b/test/strings1.rl @@ -0,0 +1,193 @@ +/* + * @LANG: c + */ + +#include <string.h> +#include <stdio.h> + +struct strs +{ + int cs; +}; + +%%{ + machine strs; + variable cs fsm->cs; + + main := + "__gmon_start__\n" | + "cerr\n" | + "__cp_push_exception\n" | + "_DYNAMIC\n" | + "__rtti_user\n" | + "__rtti_si\n" | + "_init\n" | + "__throw\n" | + "__deregister_frame_info\n" | + "terminate__Fv\n" | + "__builtin_vec_new\n" | + "_fini\n" | + "__builtin_vec_delete\n" | + "_GLOBAL_OFFSET_TABLE_\n" | + "__nw__FUiPv\n" | + "__builtin_delete\n" | + "__builtin_new\n" | + "cout\n" | + "__register_frame_info\n" | + "__eh_alloc\n" | + "strcpy\n" | + "stdout\n" | + "memmove\n" | + "memcpy\n" | + "malloc\n" | + "isatty\n" | + "strtoul\n" | + "fprintf\n" | + "stdin\n" | + "ferror\n" | + "strncpy\n" | + "unlink\n" | + "strcasecmp\n" | + "realloc\n" | + "_IO_getc\n" | + "fread\n" | + "memset\n" | + "__assert_fail\n" | + "strcmp\n" | + "stderr\n" | + "fwrite\n" | + "exit\n" | + "fopen\n" | + "atoi\n" | + "fileno\n" | + "_IO_stdin_used\n" | + "__libc_start_main\n" | + "strlen\n" | + "free\n" | + "_edata\n" | + "__bss_start\n" | + "_end\n" | + "QVhl\n" | + "BPPh\n" | + "PHRV\n" | + "PHRj\n" | + "PHRj\n" | + "jphy\n" | + "jqhy\n" | + "PHRj\n" | + "PHRj\n" | + "LWVS\n" | + "LWVS\n" | + "bad_alloc\n" | + "main\n" | + "false\n" | + "help\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "ascii\n" | + "extend\n" | + "alnum\n" | + "alpha\n" | + "cntrl\n" | + "digit\n" | + "graph\n" | + "lower\n" | + "print\n" | + "punct\n" | + "space\n" | + "upper\n" | + "xdigit\n" | + "false\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "bad_alloc\n" | + "TransStruct\n" | + "StateStruct\n" | + "Struct\n" | + "Init\n" | + "bad_alloc\n" | + "TransStruct\n" | + "StateStruct\n" | + "Struct\n" | + "Init\n" | + "Accept\n" | + "Finish\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "Finish\n" | + "Accept\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "Finish\n" | + "Accept\n" | + "bad_alloc\n" | + "Struct\n" | + "Init\n" | + "Finish\n" | + "Accept"; +}%% + +%% write data; + +void strs_init( struct strs *fsm ) +{ + %% write init; +} + +void strs_execute( struct strs *fsm, const char *_data, int _len ) +{ + const char *p = _data; + const char *pe = _data+_len; + + %% write exec; +} + +int strs_finish( struct strs *fsm ) +{ + if ( fsm->cs == strs_error ) + return -1; + if ( fsm->cs >= strs_first_final ) + return 1; + return 0; +} + +struct strs fsm; +void test( char *buf ) +{ + int len = strlen( buf ); + strs_init( &fsm ); + strs_execute( &fsm, buf, len ); + if ( strs_finish( &fsm ) > 0 ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "stdin\n" ); + test( "bad_alloc\n" ); + test( "_GLOBAL_OFFSET_TABLE_\n" ); + test( "not in\n" ); + test( + "isatty\n" + "junk on end.\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +FAIL +FAIL +#endif diff --git a/test/strings2.h b/test/strings2.h new file mode 100644 index 0000000..1cf0ce9 --- /dev/null +++ b/test/strings2.h @@ -0,0 +1,9 @@ +#ifndef _STRINGS1_H +#define _STRINGS1_H + +struct strs +{ + int cs; +}; + +#endif diff --git a/test/strings2.rl b/test/strings2.rl new file mode 100644 index 0000000..edad63b --- /dev/null +++ b/test/strings2.rl @@ -0,0 +1,1349 @@ +/* + * @LANG: c + * @ALLOW_GENFLAGS: -T0 -T1 -F0 -F1 + * @ALLOW_MINFLAGS: -n -m -l + * + * Test works with split code gen. + */ + +#include <string.h> +#include <stdio.h> + +#include "strings2.h" + +%%{ + machine strs; + variable cs fsm->cs; + + main := + "/lib/ld-linux.so.2\n" | + "libstdc++-libc6.2-2.so.3\n" | + "cerr\n" | + "__cp_push_exception\n" | + "_DYNAMIC\n" | + "endl__FR7ostream\n" | + "__ls__7ostreamc\n" | + "_._9exception\n" | + "__vt_9bad_alloc\n" | + "__rtti_user\n" | + "__ls__7ostreamPFR7ostream_R7ostream\n" | + "__rtti_si\n" | + "_init\n" | + "bad__C3ios\n" | + "__throw\n" | + "__ls__7ostreamPCc\n" | + "__deregister_frame_info\n" | + "terminate__Fv\n" | + "__ls__7ostreamb\n" | + "__ls__7ostreami\n" | + "__8ofstreamiPCcii\n" | + "__builtin_vec_new\n" | + "_fini\n" | + "__9exception\n" | + "__builtin_vec_delete\n" | + "_GLOBAL_OFFSET_TABLE_\n" | + "__vt_9exception\n" | + "__nw__FUiPv\n" | + "_._9bad_alloc\n" | + "__builtin_delete\n" | + "__builtin_new\n" | + "cout\n" | + "__register_frame_info\n" | + "__eh_alloc\n" | + "__gmon_start__\n" | + "libm.so.6\n" | + "libc.so.6\n" | + "strcpy\n" | + "stdout\n" | + "memmove\n" | + "memcpy\n" | + "malloc\n" | + "strtoul\n" | + "fprintf\n" | + "stdin\n" | + "ferror\n" | + "strncpy\n" | + "strcasecmp\n" | + "realloc\n" | + "_IO_getc\n" | + "fread\n" | + "memset\n" | + "clearerr\n" | + "__assert_fail\n" | + "strcmp\n" | + "stderr\n" | + "fwrite\n" | + "__errno_location\n" | + "exit\n" | + "fopen\n" | + "atoi\n" | + "_IO_stdin_used\n" | + "__libc_start_main\n" | + "strlen\n" | + "free\n" | + "_edata\n" | + "__bss_start\n" | + "_end\n" | + "GLIBC_2.1\n" | + "GLIBC_2.0\n" | + "PTRh\n" | + "QVhL\n" | + "<WVS\n" | + "LWVS\n" | + "PHRW\n" | + "<WVS\n" | + "\WVS\n" | + ",WVS\n" | + "@Phl\n" | + "<WVS\n" | + "jZjA\n" | + "jzja\n" | + "j9j0\n" | + "j9j0\n" | + "jZjA\n" | + "jzja\n" | + "jzja\n" | + "jZjA\n" | + "j~j!\n" | + "j~j \n" | + "j/j!\n" | + "j@j:\n" | + "j`j[\n" | + "j~j{\n" | + "j9j0\n" | + "jFjA\n" | + "jfja\n" | + ",WVS\n" | + ",WVS\n" | + ";C<|\n" | + "<WVS\n" | + "C ;C\n" | + "C$;C\n" | + "C$;C\n" | + "C ;C\n" | + ",WVS\n" | + ";E uF\n" | + "P ;U\n" | + "P ;U\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "E$fP\n" | + "u!h@\n" | + "PHRj\n" | + "PHRj\n" | + "P\ U\n" | + "j]hY\n" | + "johY\n" | + "PHRj\n" | + "PHRj\n" | + "E fPj\n" | + "E fP\n" | + "E fP\n" | + "E fP\n" | + "E fP\n" | + "E fP\n" | + "E fPj\n" | + "t$h`\n" | + "F ;C } \n" | + "F ;C ~ \n" | + "@X:BXt)\n" | + "\WVS\n" | + "\WVS\n" | + "PPRS\n" | + "F ;C } \n" | + "F ;C ~ \n" | + "@X:BXt)\n" | + ";H(}:\n" | + "@ fP\n" | + ";P |\n" | + "<WVS\n" | + ";P |\n" | + "bad_alloc\n" | + "usage: ragel [options] file\n" | + "general:\n" | + " -h, -H, -? Disply this usage.\n" | + " -o <file> Write output to <file>.\n" | + " -s Print stats on the compiled fsm.\n" | + " -f Dump the final fsm.\n" | + "fsm minimization:\n" | + " -n No minimization (default).\n" | + " -m Find the minimal fsm accepting the language.\n" | + "generated code language:\n" | + " -c Generate c code (default).\n" | + " -C Generate c++ code.\n" | + "generated code style:\n" | + " -T0 Generate a table driven fsm (default).\n" | + " -T1 Generate a faster table driven fsm.\n" | + " -S0 Generate a switch driven fsm.\n" | + " -G0 Generate a goto driven fsm.\n" | + " -G1 Generate a faster goto driven fsm.\n" | + " -G2 Generate a really fast goto driven fsm.\n" | + "char * FileNameFromStem(char *, char *)\n" | + "main.cpp\n" | + "len > 0\n" | + "main\n" | + "ragel: main graph not defined\n" | + "graph states: \n" | + "graph transitions: \n" | + "machine states: \n" | + "machine functions: \n" | + "function array: \n" | + "T:S:G:Cco:senmabjkfhH?-:\n" | + "ragel: zero length output file name given\n" | + "ragel: output file already given\n" | + "ragel: invalid param specified (try -h for a list of options)\n" | + "help\n" | + "ragel: zero length input file name given\n" | + "ragel: input file already given\n" | + "ragel: warning: -e given but minimization is not enabled\n" | + "ragel: no input file (try -h for a list of options)\n" | + " for reading\n" | + "ragel: could not open \n" | + " for writing\n" | + "ragel: error opening \n" | + " * Parts of this file are copied from Ragel source covered by the GNU\n" | + " * GPL. As a special exception, you may use the parts of this file copied\n" | + " * from Ragel source without restriction. The remainder is derived from\n" | + "bad_alloc\n" | + "%s:%i: unterminated literal\n" | + "%s:%i: unterminated comment\n" | + "%s:%i: bad character in literal\n" | + "fatal flex scanner internal error--no action found\n" | + "fatal flex scanner internal error--end of buffer missed\n" | + "fatal error - scanner input buffer overflow\n" | + "input in flex scanner failed\n" | + "out of dynamic memory in yy_create_buffer()\n" | + "out of dynamic memory in yy_scan_buffer()\n" | + "out of dynamic memory in yy_scan_bytes()\n" | + "bad buffer in yy_scan_bytes()\n" | + "bad_alloc\n" | + "%s:%i: warning: range gives null fsm\n" | + "%s:%i: warning: literal used in range is not of length 1, using 0x%x\n" | + "%s:%i: warning: overflow in byte constant\n" | + "parse error\n" | + "parser stack overflow\n" | + "%s:%i: %s\n" | + "bad_alloc\n" | + "extend\n" | + "ascii\n" | + "alpha\n" | + "digit\n" | + "alnum\n" | + "lower\n" | + "upper\n" | + "cntrl\n" | + "graph\n" | + "print\n" | + "punct\n" | + "space\n" | + "xdigit\n" | + "struct Fsm * FactorWithAugNode::Walk()\n" | + "parsetree.cpp\n" | + "false\n" | + "bad_alloc\n" | + "xx []()\n" | + " df \n" | + "StartState: \n" | + "Final States:\n" | + "void FsmGraph<State,int,Trans>::AttachStates(State *, State *, Trans *, FsmKeyType, int)\n" | + "rlfsm/fsmattach.cpp\n" | + "trans->toState == __null\n" | + "trans->fromState == __null\n" | + "void FsmGraph<State,int,Trans>::DetachStates(State *, State *, Trans *, FsmKeyType, int)\n" | + "trans->toState == to\n" | + "trans->fromState == from\n" | + "inTel != __null\n" | + "void Vector<BstMapEl<int,int>,ResizeExpn>::setAs(const Vector<BstMapEl<int,int>,ResizeExpn> &)\n" | + "aapl/vectcommon.h\n" | + "&v != this\n" | + "void FsmGraph<State,int,Trans>::ChangeRangeLowerKey(Trans *, int, int)\n" | + "inRangeEl != __null\n" | + "void FsmGraph<State,int,Trans>::IsolateStartState()\n" | + "rlfsm/fsmgraph.cpp\n" | + "md.stateDict.nodeCount == 0\n" | + "md.stfil.listLength == 0\n" | + "struct State * FsmGraph<State,int,Trans>::DetachState(State *)\n" | + "fromTel != __null\n" | + "struct Trans * FsmGraph<State,int,Trans>::AttachStates(State *, State *, FsmKeyType, int, int)\n" | + "outTel != __null\n" | + "outTel1 != __null\n" | + "from->defOutTrans == __null\n" | + "void FsmGraph<State,int,Trans>::VerifyOutFuncs()\n" | + "state->outTransFuncTable.tableLength == 0\n" | + "!state->isOutPriorSet\n" | + "state->outPriority == 0\n" | + "void FsmGraph<State,int,Trans>::VerifyIntegrity()\n" | + "rlfsm/fsmbase.cpp\n" | + "outIt.trans->fromState == state\n" | + "inIt.trans->toState == state\n" | + "static int FsmTrans<State,Trans,int,CmpOrd<int> >::ComparePartPtr(FsmTrans<State,Trans,int,CmpOrd<int> > *, FsmTrans<State,Trans,int,CmpOrd<int> > *)\n" | + "rlfsm/fsmstate.cpp\n" | + "false\n" | + "void FsmGraph<State,int,Trans>::InTransMove(State *, State *)\n" | + "dest != src\n" | + "static bool FsmTrans<State,Trans,int,CmpOrd<int> >::ShouldMarkPtr(MarkIndex<State> &, FsmTrans<State,Trans,int,CmpOrd<int> > *, FsmTrans<State,Trans,int,CmpOrd<int> > *)\n" | + "bad_alloc\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + " case \n" | + "break;}\n" | + "unsigned char\n" | + "unsigned short\n" | + "unsigned int\n" | + "{0, \n" | + "/* Forward dec state for the transition structure. */\n" | + "struct \n" | + "StateStruct;\n" | + "/* A single transition. */\n" | + "struct \n" | + "TransStruct\n" | + " struct \n" | + "StateStruct *toState;\n" | + " int *funcs;\n" | + "typedef struct \n" | + "TransStruct \n" | + "Trans;\n" | + "/* A single state. */\n" | + "struct \n" | + "StateStruct\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " unsigned int dflIndex;\n" | + " int *outFuncs;\n" | + " int isFinState;\n" | + "typedef struct \n" | + "StateStruct \n" | + "State;\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + "State *curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "#define f \n" | + "#define s \n" | + "#define i \n" | + "#define t \n" | + "/* The array of functions. */\n" | + "#if \n" | + "static int \n" | + "_f[] = {\n" | + "#endif\n" | + "/* The array of indicies into the transition array. */\n" | + "#if \n" | + "static \n" | + "_i[] = {\n" | + "#endif\n" | + "/* The aray of states. */\n" | + "static \n" | + "State \n" | + "_s[] = {\n" | + "/* The array of transitions. */\n" | + "static \n" | + "Trans \n" | + "_t[] = {\n" | + "/* The start state. */\n" | + "static \n" | + "State *\n" | + "_startState = s+\n" | + "#undef f\n" | + "#undef s\n" | + "#undef i\n" | + "#undef t\n" | + "* Execute functions pointed to by funcs until the null function is found. \n" | + "inline static void \n" | + "ExecFuncs( \n" | + " *fsm, int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + " * Init the fsm to a runnable state.\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + " * Did the fsm accept? \n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + "State *cs = fsm->curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char) *p;\n" | + "Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + \n" | + " ((\n" | + "*)cs->transIndex)[c - cs->lowIndex];\n" | + " else {\n" | + " /* Use the default index as the char is out of range. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs != 0 )\n" | + "ExecFuncs( fsm, trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " fsm->curState = cs;\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + "State *cs = fsm->curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + "ExecFuncs( fsm, cs->outFuncs, 0 );\n" | + " fsm->accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " fsm->curState = 0;\n" | + "class \n" | + "public:\n" | + " /* Forward dec state for the transition structure. */\n" | + " struct State;\n" | + " /* A single transition. */\n" | + " struct Trans\n" | + " State *toState;\n" | + " int *funcs;\n" | + " /* A single state. */\n" | + " struct State\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " unsigned int dflIndex;\n" | + " int *outFuncs;\n" | + " int isFinState;\n" | + " /* Constructor. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " State *curState;\n" | + " int accept;\n" | + " inline void ExecFuncs( int *funcs, char *p );\n" | + "/* The array of functions. */\n" | + "#if \n" | + "::State \n" | + "/* The array of trainsitions. */\n" | + "static \n" | + "::Trans \n" | + "/* The start state. */\n" | + "static \n" | + "::State *\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "inline void \n" | + "::ExecFuncs( int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + " * Constructor\n" | + " Init();\n" | + "Init\n" | + "void \n" | + "::Init( )\n" | + " curState = \n" | + "_startState;\n" | + " accept = 0;\n" | + "::Accept( )\n" | + " return accept;\n" | + "::Execute( char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " State *cs = curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char)*p;\n" | + " Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs != 0 )\n" | + " ExecFuncs( trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " curState = cs;\n" | + "::Finish( )\n" | + " State *cs = curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + " ExecFuncs( cs->outFuncs, 0 );\n" | + " accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " curState = 0;\n" | + "10TabCodeGen\n" | + "11CTabCodeGen\n" | + "12CCTabCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + " case \n" | + " break;\n" | + "/* Forward dec state for the transition structure. */\n" | + "struct \n" | + "StateStruct;\n" | + "/* A single transition. */\n" | + "struct \n" | + "TransStruct\n" | + " struct \n" | + "StateStruct *toState;\n" | + " int funcs;\n" | + "typedef struct \n" | + "TransStruct \n" | + "Trans;\n" | + "/* A single state. */\n" | + "struct \n" | + "StateStruct\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " int dflIndex;\n" | + " int outFuncs;\n" | + " int isFinState;\n" | + "typedef struct \n" | + "StateStruct \n" | + "State;\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + "State *curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "#define s \n" | + "#define i \n" | + "#define t \n" | + "/* The array of indicies into the transition array. */\n" | + "#if \n" | + "static \n" | + "_i[] = {\n" | + "#endif\n" | + "/* The aray of states. */\n" | + "static \n" | + "State \n" | + "_s[] = {\n" | + "/* The array of trainsitions. */\n" | + "static \n" | + "Trans \n" | + "_t[] = {\n" | + "/* The start state. */\n" | + "static \n" | + "State *\n" | + "_startState = s+\n" | + "#undef f\n" | + "#undef s\n" | + "#undef i\n" | + "#undef t\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "inline static void \n" | + "ExecFuncs( \n" | + " *fsm, int funcs, char *p )\n" | + " switch ( funcs ) {\n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/****************************************\n" | + "Accept\n" | + " * Did the fsm accept? \n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + "State *cs = fsm->curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char)*p;\n" | + "Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + \n" | + " ((\n" | + "*)cs->transIndex)[c - cs->lowIndex];\n" | + " else {\n" | + " /* Use the default index as the char is out of range. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs >= 0 )\n" | + "ExecFuncs( fsm, trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " fsm->curState = cs;\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + "State *cs = fsm->curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + "ExecFuncs( fsm, cs->outFuncs, 0 );\n" | + " fsm->accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " fsm->curState = 0;\n" | + "class \n" | + "public:\n" | + " /* Function and index type. */\n" | + " typedef int Func;\n" | + " /* Forward dec state for the transition structure. */\n" | + " struct State;\n" | + " /* A single transition. */\n" | + " struct Trans\n" | + " State *toState;\n" | + " int funcs;\n" | + " /* A single state. */\n" | + " struct State\n" | + " int lowIndex;\n" | + " int highIndex;\n" | + " void *transIndex;\n" | + " int dflIndex;\n" | + " int outFuncs;\n" | + " int isFinState;\n" | + " /* Constructor. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " State *curState;\n" | + " int accept;\n" | + " inline void ExecFuncs( int funcs, char *p );\n" | + "::State \n" | + "::Trans \n" | + "::State *\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "inline void \n" | + "::ExecFuncs( int funcs, char *p )\n" | + " switch ( funcs ) {\n" | + "/****************************************\n" | + " * Constructor\n" | + " Init();\n" | + "/****************************************\n" | + "::Init( )\n" | + " curState = \n" | + "_startState;\n" | + " accept = 0;\n" | + "/****************************************\n" | + " * Did the fsm accept? \n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + "::Execute( char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " State *cs = curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " int c = (unsigned char)*p;\n" | + " Trans *trans;\n" | + " if ( cs == 0 )\n" | + " goto finished;\n" | + " /* If the character is within the index bounds then get the\n" | + " * transition for it. If it is out of the transition bounds\n" | + " * we will use the default transition. */\n" | + " if ( cs->lowIndex <= c && c < cs->highIndex ) {\n" | + " /* Use the index to look into the transition array. */\n" | + " trans = \n" | + "_t + cs->dflIndex;\n" | + " /* If there are functions for this transition then execute them. */\n" | + " if ( trans->funcs != 0 )\n" | + " ExecFuncs( trans->funcs, p );\n" | + " /* Move to the new state. */\n" | + " cs = trans->toState;\n" | + "finished:\n" | + " curState = cs;\n" | + "/**********************************************************************\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " State *cs = curState;\n" | + " if ( cs != 0 && cs->isFinState ) {\n" | + " /* If finishing in a final state then execute the\n" | + " * out functions for it. (if any). */\n" | + " if ( cs->outFuncs != 0 )\n" | + " ExecFuncs( cs->outFuncs, 0 );\n" | + " accept = 1;\n" | + " else {\n" | + " /* If we are not in a final state then this\n" | + " * is an error. Move to the error state. */\n" | + " curState = 0;\n" | + "11FTabCodeGen\n" | + "12CFTabCodeGen\n" | + "13CCFTabCodeGen\n" | + "bad_alloc\n" | + "cs = -1; \n" | + "cs = \n" | + "break;\n" | + " switch( cs ) {\n" | + " case \n" | + " switch ( c ) {\n" | + "case \n" | + "default: \n" | + " }\n" | + " break;\n" | + " switch( cs ) {\n" | + "accept = 1; \n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " int cs = fsm->curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " unsigned char c = (unsigned char)*p;\n" | + " fsm->curState = cs;\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + "Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + " Init();\n" | + "/****************************************\n" | + "::Init\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "::Execute( char *data, int dlen )\n" | + " char *p = data;\n" | + " int len = dlen;\n" | + " int cs = curState;\n" | + " for ( ; len > 0; p++, len-- ) {\n" | + " unsigned char c = (unsigned char)*p;\n" | + " curState = cs;\n" | + "/**********************************************************************\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "10SelCodeGen\n" | + "11CSelCodeGen\n" | + "12CCSelCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + "goto tr\n" | + "goto st\n" | + "goto err;\n" | + " case \n" | + "break;}\n" | + ": goto st\n" | + " case \n" | + " default: return;\n" | + " goto st\n" | + " if ( --len == 0 )\n" | + " goto out\n" | + " switch( (alph) *++p ) {\n" | + "case \n" | + " default: \n" | + " return;\n" | + "curState = \n" | + " switch( cs ) {\n" | + "accept = 1; \n" | + "break;\n" | + "err:\n" | + "curState = -1;\n" | + ", p );\n" | + "ExecFuncs( fsm, f+\n" | + "fsm->\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "#define f \n" | + "#define alph unsigned char\n" | + "/* The array of functions. */\n" | + "#if \n" | + "static int \n" | + "_f[] = {\n" | + "#endif\n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/***************************************************************************\n" | + " * Function exection. We do not inline this as in tab\n" | + " * code gen because if we did, we might as well just expand \n" | + " * the function as in the faster goto code generator.\n" | + "static void \n" | + "ExecFuncs( \n" | + " *fsm, int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data - 1;\n" | + " register int len = dlen + 1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( \n" | + "curState ) {\n" | + "/**********************************************************************\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "#undef f\n" | + "#undef alph\n" | + " ExecFuncs( f+\n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + " /* Function exection. We do not inline this as in tab code gen\n" | + " * because if we did, we might as well just expand the function \n" | + " * as in the faster goto code generator. */\n" | + " void ExecFuncs( int *funcs, char * );\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + "/* some defines to lessen the code size. */\n" | + "#define f \n" | + "#endif\n" | + "/****************************************\n" | + " * Make sure the fsm is initted.\n" | + " Init();\n" | + "/****************************************\n" | + " * Initialize the fsm.\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "void \n" | + "::ExecFuncs( int *funcs, char *p )\n" | + " int len = *funcs++;\n" | + " while ( len-- > 0 ) {\n" | + " switch ( *funcs++ ) {\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + "::Execute( char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data - 1;\n" | + " register int len = dlen + 1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( curState ) {\n" | + "/**********************************************************************\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "#undef f\n" | + "#undef alph\n" | + "11GotoCodeGen\n" | + "12CGotoCodeGen\n" | + "13CCGotoCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + " case \n" | + " break;\n" | + ", p );\n" | + "ExecFuncs( fsm, \n" | + "fsm->\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "/***************************************************************************\n" | + " * Function exection. We do not inline this as in tab\n" | + " * code gen because if we did, we might as well just expand \n" | + " * the function as in the faster goto code generator.\n" | + "static void \n" | + "ExecFuncs( \n" | + " *fsm, int func, char *p )\n" | + " switch ( func ) {\n" | + "#define alph unsigned char\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( \n" | + "curState ) {\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + "Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "#undef alph\n" | + " ExecFuncs( \n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + " /* Function exection. We do not inline this as in tab code gen\n" | + " * because if we did, we might as well just expand the function \n" | + " * as in the faster goto code generator. */\n" | + " void ExecFuncs( int func, char *p );\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + " Init();\n" | + "/****************************************\n" | + "::Init\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "/***************************************************************************\n" | + " * Execute functions pointed to by funcs until the null function is found. \n" | + "void \n" | + "::ExecFuncs( int func, char *p )\n" | + " switch ( func ) {\n" | + "::Execute( char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( curState ) {\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "#undef alph\n" | + "12FGotoCodeGen\n" | + "13CFGotoCodeGen\n" | + "14CCFGotoCodeGen\n" | + "11GotoCodeGen\n" | + "10FsmCodeGen\n" | + "bad_alloc\n" | + "fsm->\n" | + "/* Only non-static data: current state. */\n" | + "struct \n" | + "Struct\n" | + " int curState;\n" | + " int accept;\n" | + "typedef struct \n" | + "Struct \n" | + "/* Init the fsm. */\n" | + "void \n" | + "Init( \n" | + " *fsm );\n" | + "/* Execute some chunk of data. */\n" | + "void \n" | + "Execute( \n" | + " *fsm, char *data, int dlen );\n" | + "/* Indicate to the fsm tha there is no more data. */\n" | + "void \n" | + "Finish( \n" | + " *fsm );\n" | + "/* Did the machine accept? */\n" | + "int \n" | + "Accept( \n" | + " *fsm );\n" | + "/* The start state. */\n" | + "static int \n" | + "_startState = \n" | + "/****************************************\n" | + "Init\n" | + "void \n" | + " *fsm )\n" | + " fsm->curState = \n" | + "_startState;\n" | + " fsm->accept = 0;\n" | + "#define alph unsigned char\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + " *fsm, char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( \n" | + "curState ) {\n" | + "/**********************************************************************\n" | + "Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + " *fsm )\n" | + " int cs = fsm->curState;\n" | + " int accept = 0;\n" | + " fsm->accept = accept;\n" | + "/*******************************************************\n" | + "Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + " *fsm )\n" | + " return fsm->accept;\n" | + "#undef alph\n" | + "/* Only non-static data: current state. */\n" | + "class \n" | + "public:\n" | + " /* Init the fsm. */\n" | + " void Init( );\n" | + " /* Execute some chunk of data. */\n" | + " void Execute( char *data, int dlen );\n" | + " /* Indicate to the fsm tha there is no more data. */\n" | + " void Finish( );\n" | + " /* Did the machine accept? */\n" | + " int Accept( );\n" | + " int curState;\n" | + " int accept;\n" | + " /* The start state. */\n" | + " static int startState;\n" | + "/* The start state. */\n" | + "int \n" | + "::startState = \n" | + " Init();\n" | + "/****************************************\n" | + "::Init\n" | + "void \n" | + "::Init( )\n" | + " curState = startState;\n" | + " accept = 0;\n" | + "#define alph unsigned char\n" | + "/**********************************************************************\n" | + " * Execute the fsm on some chunk of data. \n" | + "void \n" | + "::Execute( char *data, int dlen )\n" | + " /* Prime these to one back to simulate entering the \n" | + " * machine on a transition. */ \n" | + " register char *p = data-1;\n" | + " register int len = dlen+1;\n" | + " /* Switch statment to enter the machine. */\n" | + " switch ( curState ) {\n" | + "::Finish\n" | + " * Indicate to the fsm that the input is done. Does cleanup tasks.\n" | + "void \n" | + "::Finish( )\n" | + " int cs = curState;\n" | + " int accept = 0;\n" | + " this->accept = accept;\n" | + "/*******************************************************\n" | + "::Accept\n" | + " * Did the machine accept?\n" | + "int \n" | + "::Accept( )\n" | + " return accept;\n" | + "#undef alph\n" | + "13IpGotoCodeGen\n" | + "14CIpGotoCodeGen\n" | + "15CCIpGotoCodeGen\n" | + "11GotoCodeGen\n" | + "10FsmCodeGen\n"; +}%% + +%% write data; +struct strs the_fsm; + +void test( char *buf ) +{ + struct strs *fsm = &the_fsm; + char *p = buf; + char *pe = buf + strlen( buf ); + + %% write init; + %% write exec; + + if ( fsm->cs >= strs_first_final ) + printf("ACCEPT\n"); + else + printf("FAIL\n"); +} + + +int main() +{ + test( "stdin\n" ); + test( "bad_alloc\n" ); + test( "_GLOBAL_OFFSET_TABLE_\n" ); + test( "not in\n" ); + test( + "isatty\n" + "junk on end.\n" + ); + + return 0; +} + +#ifdef _____OUTPUT_____ +ACCEPT +ACCEPT +ACCEPT +FAIL +FAIL +#endif diff --git a/test/testcase.txl b/test/testcase.txl new file mode 100644 index 0000000..65d7912 --- /dev/null +++ b/test/testcase.txl @@ -0,0 +1,192 @@ +comments + '# +end comments + +tokens + union "\[[(\\\c)#\]]*\]" +end tokens + +compounds + '%% '%%{ '}%% '== ':= '-> '<> '>= '<= '=> + '|* '*| + '>! '<! '$! '%! '@! '<>! + '>/ '</ '$/ '%/ '@/ '<>/ +end compounds + +keys + 'int 'bool 'true 'false 'char 'ptr + 'if 'else 'printi 'prints 'printb 'print_token + 'fc 'fpc 'fbreak 'fgoto 'fcall 'fret 'fhold 'fexec + 'machine 'alphtype 'action + 'first_token_char +end keys + +define lang_indep + [al_statements] + '%% [NL] + [al_statements] + [ragel_def] +end define + +define ragel_def + '%%{ [NL] [IN] + [ragel_program] + [EX] '}%% [NL] +end define + +define ragel_program + [repeat statement] +end define + +define statement + [machine_stmt] + | [alphtype_stmt] + | [action_stmt] + | [cond_action_stmt] + | [machine_def] + | [machine_inst] +end define + +define machine_stmt + 'machine [id] '; [NL] +end define + +define alphtype_stmt + 'alphtype [repeat id] '; [NL] +end define + +define action_stmt + 'action [id] [al_host_block] +end define + +define cond_action_stmt + 'action [id] '{ [al_expr] '} [NL] +end define + +define al_statements + [repeat action_lang_stmt] +end define + +define action_lang_stmt + [al_ragel_stmt] + | [al_variable_decl] + | [al_expr_stmt] + | [al_if_stmt] + | [al_print_stmt] + | '{ [al_statements] '} +end define + +define al_print_stmt + [print_cmd] [al_expr] '; [NL] + | 'print_token '; [NL] +end define + +define print_cmd + 'printi | 'prints | 'printb +end define + +define al_variable_decl + [al_type_decl] [id] [opt union] '; [NL] +end define + +define al_array_decl + '[ [number] '] +end define + +define al_type_decl + 'int | 'bool | 'char | 'ptr +end define + +define al_expr_stmt + [al_expr] '; [NL] +end define + +define al_expr + [al_term] [repeat al_expr_extend] +end define + +define al_expr_extend + [al_expr_op] [al_term] +end define + +define al_expr_op + '= | '+ | '- | '* | '/ | '== | '<= | '>= | '< | '> +end define + +define al_term + [al_term_base] [opt union] +end define + +define al_term_base + [id] + | [SPOFF] [id] '( [SPON] [al_expr] ') + | [opt al_sign] [number] + | [stringlit] + | [charlit] + | 'fc + | 'true + | 'false + | '( [al_expr] ') + | '< [SPOFF] [al_type_decl] '> '( [SPON] [al_expr] ') + | 'first_token_char +end define + +define al_sign + '- | '+ +end define + +define al_if_stmt + 'if '( [al_expr] ') [NL] [IN] + [action_lang_stmt] [EX] + [opt al_else] +end define + +define al_else + 'else [NL] [IN] + [action_lang_stmt] [EX] +end define + +define al_ragel_stmt + 'fbreak '; [NL] + | 'fhold '; [NL] + | 'fexec [repeat al_expr] '; [NL] + | 'fnext [id] '; [NL] + | 'fgoto [id] '; [NL] + | 'fcall [id] '; [NL] + | 'fnext '* [repeat al_expr] '; [NL] + | 'fgoto '* [repeat al_expr] '; [NL] + | 'fcall '* [repeat al_expr] '; [NL] + | 'fret '; [NL] +end define + +define machine_def + [id] '= [machine_expr] '; [NL] +end define + +define machine_inst + [id] ':= [machine_expr] '; [NL] +end define + +define machine_expr + [repeat machine_expr_item] +end define + +define scanner_item + [repeat machine_expr_item] '; [NL] +end define + +define machine_expr_item + [action_embed] [al_host_block] + | '|* [repeat scanner_item] '*| + | [not ';] [not '*|] [token] +end define + +define al_host_block + '{ [NL] [IN] [al_statements] [EX] '} [NL] +end define + +define action_embed + '> | '$ | '@ | '% | + '$! | '=> +end define + diff --git a/test/tokstart1.rl b/test/tokstart1.rl new file mode 100644 index 0000000..e8c1552 --- /dev/null +++ b/test/tokstart1.rl @@ -0,0 +1,238 @@ +/* + * @LANG: c++ + */ + +#include <iostream> +#include <string.h> +using namespace std; + +extern char buf[]; + +struct Scanner +{ + int cs, act; + char *ts, *te; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + int execute( char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Scanner; + + action to_act { + cout << "to: fc = "; + if ( fc == '\'' ) + cout << (int)fc; + else + cout << fc; + cout << " ts = " << ( ts == 0 ? -1 : ts-buf ) << endl; + } + action from_act { + cout << "from: fc = "; + if ( fc == '\'' ) + cout << (int)fc; + else + cout << fc; + cout << " ts = " << ( ts == 0 ? -1 : ts-buf ) << endl; + } + + c_comm := ( any* $0 '*/' @1 @{ fgoto main; } ) $~to_act $*from_act; + cxx_comm := ( any* $0 '\n' @1 @{ fgoto main; } ) $~to_act $*from_act; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) $~ to_act $* from_act; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) $~ to_act $* from_act; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) $~ to_act $* from_act; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + ( fract_const exponent? float_suffix? | + digit+ exponent float_suffix? ) $~ to_act $* from_act; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) $~ to_act $* from_act; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) $~ to_act $* from_act; + + # Integer hex. Leading 0 buffered by float. + ( '0x' [0-9a-fA-F]+ [ulUL]{0,2} ) $~ to_act $* from_act; + + # Three char compounds, first item already buffered. */ + ( '...' ) $~ to_act $* from_act; + + # Single char symbols. + ( punct - [_"'] ) $~ to_act $* from_act; + + # Comments and whitespace. + ( '/*' ) $~ to_act $* from_act { fgoto c_comm; }; + ( '//' ) $~ to_act $* from_act { fgoto cxx_comm; }; + + ( any - 33..126 )+ $~ to_act $* from_act; + + *|; +}%% + +%% write data; + +void Scanner::init( ) +{ + %% write init; +} + +int Scanner::execute( char *data, int len ) +{ + char *p = data; + char *pe = data + len; + char *eof = pe; + + %% write exec; + + return 0; +} + +int Scanner::finish( ) +{ + if ( cs == Scanner_error ) + return -1; + if ( cs >= Scanner_first_final ) + return 1; + return 0; +} + +void test( ) +{ + int len = strlen( buf ); + Scanner scanner; + + scanner.init(); + scanner.execute( buf, len ); + if ( scanner.cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cout << "PARSE ERROR" << endl; + } + scanner.finish(); +} + +char buf[4096]; + +int main() +{ + strcpy( buf, + "a b 0.98 /*\n" + "9 */'\\''//hi\n" + "there\n" + ); + test(); + return 0; +} + +#ifdef _____OUTPUT_____ +from: fc = a ts = 0 +to: fc = a ts = 0 +from: fc = ts = 0 +to: fc = a ts = -1 +from: fc = ts = 1 +to: fc = ts = 1 +from: fc = b ts = 1 +to: fc = ts = -1 +from: fc = b ts = 2 +to: fc = b ts = 2 +from: fc = ts = 2 +to: fc = b ts = -1 +from: fc = ts = 3 +to: fc = ts = 3 +from: fc = 0 ts = 3 +to: fc = ts = -1 +from: fc = 0 ts = 4 +to: fc = 0 ts = 4 +from: fc = . ts = 4 +to: fc = . ts = 4 +from: fc = 9 ts = 4 +to: fc = 9 ts = 4 +from: fc = 8 ts = 4 +to: fc = 8 ts = 4 +from: fc = ts = 4 +to: fc = 8 ts = -1 +from: fc = ts = 8 +to: fc = ts = 8 +from: fc = / ts = 8 +to: fc = ts = -1 +from: fc = / ts = 9 +to: fc = / ts = 9 +from: fc = * ts = 9 +to: fc = * ts = -1 +from: fc = + ts = -1 +to: fc = + ts = -1 +from: fc = 9 ts = -1 +to: fc = 9 ts = -1 +from: fc = ts = -1 +to: fc = ts = -1 +from: fc = * ts = -1 +to: fc = * ts = -1 +from: fc = / ts = -1 +to: fc = / ts = -1 +from: fc = 39 ts = 16 +to: fc = 39 ts = 16 +from: fc = \ ts = 16 +to: fc = \ ts = 16 +from: fc = 39 ts = 16 +to: fc = 39 ts = 16 +from: fc = 39 ts = 16 +to: fc = 39 ts = -1 +from: fc = / ts = 20 +to: fc = / ts = 20 +from: fc = / ts = 20 +to: fc = / ts = -1 +from: fc = h ts = -1 +to: fc = h ts = -1 +from: fc = i ts = -1 +to: fc = i ts = -1 +from: fc = + ts = -1 +to: fc = + ts = -1 +from: fc = t ts = 25 +to: fc = t ts = 25 +from: fc = h ts = 25 +to: fc = h ts = 25 +from: fc = e ts = 25 +to: fc = e ts = 25 +from: fc = r ts = 25 +to: fc = r ts = 25 +from: fc = e ts = 25 +to: fc = e ts = 25 +from: fc = + ts = 25 +to: fc = e ts = -1 +from: fc = + ts = 30 +to: fc = + ts = 30 +to: fc = + ts = -1 +#endif diff --git a/test/union.rl b/test/union.rl new file mode 100644 index 0000000..a3cc98f --- /dev/null +++ b/test/union.rl @@ -0,0 +1,193 @@ +/* + * @LANG: c++ + * Show off concurrent abilities. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +using namespace std; + +#define BUFSIZE 2048 + +struct Concurrent +{ + int cur_char; + int start_word; + int start_comment; + int start_literal; + + int cs; + + // Initialize the machine. Invokes any init statement blocks. Returns 0 + // if the machine begins in a non-accepting state and 1 if the machine + // begins in an accepting state. + void init( ); + + // Execute the machine on a block of data. Returns -1 if after processing + // the data, the machine is in the error state and can never accept, 0 if + // the machine is in a non-accepting state and 1 if the machine is in an + // accepting state. + void execute( const char *data, int len ); + + // Indicate that there is no more data. Returns -1 if the machine finishes + // in the error state and does not accept, 0 if the machine finishes + // in any other non-accepting state and 1 if the machine finishes in an + // accepting state. + int finish( ); +}; + +%%{ + machine Concurrent; + + action next_char { + cur_char += 1; + } + + action start_word { + start_word = cur_char; + } + action end_word { + cout << "word: " << start_word << + " " << cur_char-1 << endl; + } + + action start_comment { + start_comment = cur_char; + } + action end_comment { + cout << "comment: " << start_comment << + " " << cur_char-1 << endl; + } + + action start_literal { + start_literal = cur_char; + } + action end_literal { + cout << "literal: " << start_literal << + " " << cur_char-1 << endl; + } + + # Count characters. + chars = ( any @next_char )*; + + # Words are non-whitespace. + word = ( any-space )+ >start_word %end_word; + words = ( ( word | space ) $1 %0 )*; + + # Finds C style comments. + comment = ( '/*' any* $0 '*/'@1 ) >start_comment %end_comment; + comments = ( ( comment | any ) $1 %0 )*; + + # Finds single quoted strings. + literalChar = ( any - ['\\] ) | ( '\\' . any ); + literal = ('\'' literalChar* '\'' ) >start_literal %end_literal; + literals = ( ( literal | (any-'\'') ) $1 %0 )*; + + main := chars | words | comments | literals; +}%% + +%% write data; + +void Concurrent::init( ) +{ + cur_char = 0; + start_word = 0; + start_comment = 0; + start_literal = 0; + %% write init; +} + +void Concurrent::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + const char *eof = pe; + + %% write exec; +} + +int Concurrent::finish( ) +{ + if ( cs == Concurrent_error ) + return -1; + if ( cs >= Concurrent_first_final ) + return 1; + return 0; +} + +void test( const char *buf ) +{ + Concurrent concurrent; + concurrent.init(); + concurrent.execute( buf, strlen(buf) ); + if ( concurrent.finish() > 0 ) + cout << "ACCEPT" << endl; + else + cout << "FAIL" << endl; +} + +int main() +{ + test( + "/* in a comment,\n" + " * ' and now in a literal string\n" + " */ \n" + " \n" + "the comment has now ended but the literal string lives on\n" + "\n" + "' comment closed\n" ); + test( "/* * ' \\' */ \\' '\n" ); + test( "/**/'\\''/*/*/\n" ); + return 0; +} + +#ifdef _____OUTPUT_____ +word: 1 2 +word: 4 5 +word: 7 7 +word: 9 16 +word: 19 19 +word: 21 21 +word: 23 25 +word: 27 29 +word: 31 32 +word: 34 34 +word: 36 42 +word: 44 49 +word: 52 53 +comment: 1 53 +word: 58 60 +word: 62 68 +word: 70 72 +word: 74 76 +word: 78 82 +word: 84 86 +word: 88 90 +word: 92 98 +word: 100 105 +word: 107 111 +word: 113 114 +word: 117 117 +literal: 21 117 +word: 119 125 +word: 127 132 +ACCEPT +word: 1 2 +word: 4 4 +word: 6 6 +word: 8 9 +word: 11 12 +comment: 1 12 +word: 14 15 +word: 17 17 +literal: 6 17 +ACCEPT +comment: 1 4 +literal: 5 8 +word: 1 13 +comment: 9 13 +ACCEPT +#endif diff --git a/test/xml.rl b/test/xml.rl new file mode 100644 index 0000000..3a76400 --- /dev/null +++ b/test/xml.rl @@ -0,0 +1,107 @@ +/* + * XML parser based on the XML 1.0 BNF from: + * http://www.jelks.nu/XML/xmlebnf.html + * + * @LANG: c++ + * @ALLOW_MINFLAGS: -l -e + * @ALLOW_GENFLAGS: -T0 -T1 + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +struct XML +{ + int cur_char; + int start_word; + int start_comment; + int start_literal; + + int cs, top, stack[1024]; + + int init( ); + int execute( const unsigned short *data, int len ); + int finish( ); +}; + +%%{ + machine XML; + alphtype unsigned short; + + action next_char { + cur_char += 1; + } + + action start_word { + start_word = cur_char; + } + action end_word { + cout << "word: " << start_word << + " " << cur_char-1 << endl; + } + + Extender = 0x00B7 | 0x02D0 | 0x02D1 | 0x0387 | 0x0640 | 0x0E46 | 0x0EC6 | 0x3005 | (0x3031..0x3035) | (0x309D..0x309E) | (0x30FC..0x30FE); + + Digit = (0x0030..0x0039) | (0x0660..0x0669) | (0x06F0..0x06F9) | (0x0966..0x096F) | (0x09E6..0x09EF) | (0x0A66..0x0A6F) | (0x0AE6..0x0AEF) | (0x0B66..0x0B6F) | (0x0BE7..0x0BEF) | (0x0C66..0x0C6F) | (0x0CE6..0x0CEF) | (0x0D66..0x0D6F) | (0x0E50..0x0E59) | (0x0ED0..0x0ED9) | (0x0F20..0x0F29); + + CombiningChar = (0x0300..0x0345) | (0x0360..0x0361) | (0x0483..0x0486) | (0x0591..0x05A1) | (0x05A3..0x05B9) | (0x05BB..0x05BD) | 0x05BF | (0x05C1..0x05C2) | 0x05C4 | (0x064B..0x0652) | 0x0670 | (0x06D6..0x06DC) | (0x06DD..0x06DF) | (0x06E0..0x06E4) | (0x06E7..0x06E8) | (0x06EA..0x06ED) | (0x0901..0x0903) | 0x093C | (0x093E..0x094C) | 0x094D | (0x0951..0x0954) | (0x0962..0x0963) | (0x0981..0x0983) | 0x09BC | 0x09BE | 0x09BF | (0x09C0..0x09C4) | (0x09C7..0x09C8) | (0x09CB..0x09CD) | 0x09D7 | (0x09E2..0x09E3) | 0x0A02 | 0x0A3C | 0x0A3E | 0x0A3F | (0x0A40..0x0A42) | (0x0A47..0x0A48) | (0x0A4B..0x0A4D) | (0x0A70..0x0A71) | (0x0A81..0x0A83) | 0x0ABC | (0x0ABE..0x0AC5) | (0x0AC7..0x0AC9) | (0x0ACB..0x0ACD) | (0x0B01..0x0B03) | 0x0B3C | (0x0B3E..0x0B43) | (0x0B47..0x0B48) | (0x0B4B..0x0B4D) | (0x0B56..0x0B57) | (0x0B82..0x0B83) | (0x0BBE..0x0BC2) | (0x0BC6..0x0BC8) | (0x0BCA..0x0BCD) | 0x0BD7 | (0x0C01..0x0C03) | (0x0C3E..0x0C44) | (0x0C46..0x0C48) | (0x0C4A..0x0C4D) | (0x0C55..0x0C56) | (0x0C82..0x0C83) | (0x0CBE..0x0CC4) | (0x0CC6..0x0CC8) | (0x0CCA..0x0CCD) | (0x0CD5..0x0CD6) | (0x0D02..0x0D03) | (0x0D3E..0x0D43) | (0x0D46..0x0D48) | (0x0D4A..0x0D4D) | 0x0D57 | 0x0E31 | (0x0E34..0x0E3A) | (0x0E47..0x0E4E) | 0x0EB1 | (0x0EB4..0x0EB9) | (0x0EBB..0x0EBC) | (0x0EC8..0x0ECD) | (0x0F18..0x0F19) | 0x0F35 | 0x0F37 | 0x0F39 | 0x0F3E | 0x0F3F | (0x0F71..0x0F84) | (0x0F86..0x0F8B) | (0x0F90..0x0F95) | 0x0F97 | (0x0F99..0x0FAD) | (0x0FB1..0x0FB7) | 0x0FB9 | (0x20D0..0x20DC) | 0x20E1 | (0x302A..0x302F) | 0x3099 | 0x309A; + + Ideographic = (0x4E00..0x9FA5) | 0x3007 | (0x3021..0x3029); + + BaseChar = (0x0041..0x005A) | (0x0061..0x007A) | (0x00C0..0x00D6) | (0x00D8..0x00F6) | (0x00F8..0x00FF) | (0x0100..0x0131) | (0x0134..0x013E) | (0x0141..0x0148) | (0x014A..0x017E) | (0x0180..0x01C3) | (0x01CD..0x01F0) | (0x01F4..0x01F5) | (0x01FA..0x0217) | (0x0250..0x02A8) | (0x02BB..0x02C1) | 0x0386 | (0x0388..0x038A) | 0x038C | (0x038E..0x03A1) | (0x03A3..0x03CE) | (0x03D0..0x03D6) | 0x03DA | 0x03DC | 0x03DE | 0x03E0 | (0x03E2..0x03F3) | (0x0401..0x040C) | (0x040E..0x044F) | (0x0451..0x045C) | (0x045E..0x0481) | (0x0490..0x04C4) | (0x04C7..0x04C8) | (0x04CB..0x04CC) | (0x04D0..0x04EB) | (0x04EE..0x04F5) | (0x04F8..0x04F9) | (0x0531..0x0556) | 0x0559 | (0x0561..0x0586) | (0x05D0..0x05EA) | (0x05F0..0x05F2) | (0x0621..0x063A) | (0x0641..0x064A) | (0x0671..0x06B7) | (0x06BA..0x06BE) | (0x06C0..0x06CE) | (0x06D0..0x06D3) | 0x06D5 | (0x06E5..0x06E6) | (0x0905..0x0939) | 0x093D | (0x0958..0x0961) | (0x0985..0x098C) | (0x098F..0x0990) | (0x0993..0x09A8) | (0x09AA..0x09B0) | 0x09B2 | (0x09B6..0x09B9) | (0x09DC..0x09DD) | (0x09DF..0x09E1) | (0x09F0..0x09F1) | (0x0A05..0x0A0A) | (0x0A0F..0x0A10) | (0x0A13..0x0A28) | (0x0A2A..0x0A30) | (0x0A32..0x0A33) | (0x0A35..0x0A36) | (0x0A38..0x0A39) | (0x0A59..0x0A5C) | 0x0A5E | (0x0A72..0x0A74) | (0x0A85..0x0A8B) | 0x0A8D | (0x0A8F..0x0A91) | (0x0A93..0x0AA8) | (0x0AAA..0x0AB0) | (0x0AB2..0x0AB3) | (0x0AB5..0x0AB9) | 0x0ABD | 0x0AE0 | (0x0B05..0x0B0C) | (0x0B0F..0x0B10) | (0x0B13..0x0B28) | (0x0B2A..0x0B30) | (0x0B32..0x0B33) | (0x0B36..0x0B39) | 0x0B3D | (0x0B5C..0x0B5D) | (0x0B5F..0x0B61) | (0x0B85..0x0B8A) | (0x0B8E..0x0B90) | (0x0B92..0x0B95) | (0x0B99..0x0B9A) | 0x0B9C | (0x0B9E..0x0B9F) | (0x0BA3..0x0BA4) | (0x0BA8..0x0BAA) | (0x0BAE..0x0BB5) | (0x0BB7..0x0BB9) | (0x0C05..0x0C0C) | (0x0C0E..0x0C10) | (0x0C12..0x0C28) | (0x0C2A..0x0C33) | (0x0C35..0x0C39) | (0x0C60..0x0C61) | (0x0C85..0x0C8C) | (0x0C8E..0x0C90) | (0x0C92..0x0CA8) | (0x0CAA..0x0CB3) | (0x0CB5..0x0CB9) | 0x0CDE | (0x0CE0..0x0CE1) | (0x0D05..0x0D0C) | (0x0D0E..0x0D10) | (0x0D12..0x0D28) | (0x0D2A..0x0D39) | (0x0D60..0x0D61) | (0x0E01..0x0E2E) | 0x0E30 | (0x0E32..0x0E33) | (0x0E40..0x0E45) | (0x0E81..0x0E82) | 0x0E84 | (0x0E87..0x0E88) | 0x0E8A | 0x0E8D | (0x0E94..0x0E97) | (0x0E99..0x0E9F) | (0x0EA1..0x0EA3) | 0x0EA5 | 0x0EA7 | (0x0EAA..0x0EAB) | (0x0EAD..0x0EAE) | 0x0EB0 | (0x0EB2..0x0EB3) | 0x0EBD | (0x0EC0..0x0EC4) | (0x0F40..0x0F47) | (0x0F49..0x0F69) | (0x10A0..0x10C5) | (0x10D0..0x10F6) | 0x1100 | (0x1102..0x1103) | (0x1105..0x1107) | 0x1109 | (0x110B..0x110C) | (0x110E..0x1112) | 0x113C | 0x113E | 0x1140 | 0x114C | 0x114E | 0x1150 | (0x1154..0x1155) | 0x1159 | (0x115F..0x1161) | 0x1163 | 0x1165 | 0x1167 | 0x1169 | (0x116D..0x116E) | (0x1172..0x1173) | 0x1175 | 0x119E | 0x11A8 | 0x11AB | (0x11AE..0x11AF) | (0x11B7..0x11B8) | 0x11BA | (0x11BC..0x11C2) | 0x11EB | 0x11F0 | 0x11F9 | (0x1E00..0x1E9B) | (0x1EA0..0x1EF9) | (0x1F00..0x1F15) | (0x1F18..0x1F1D) | (0x1F20..0x1F45) | (0x1F48..0x1F4D) | (0x1F50..0x1F57) | 0x1F59 | 0x1F5B | 0x1F5D | (0x1F5F..0x1F7D) | (0x1F80..0x1FB4) | (0x1FB6..0x1FBC) | 0x1FBE | (0x1FC2..0x1FC4) | (0x1FC6..0x1FCC) | (0x1FD0..0x1FD3) | (0x1FD6..0x1FDB) | (0x1FE0..0x1FEC) | (0x1FF2..0x1FF4) | (0x1FF6..0x1FFC) | 0x2126 | (0x212A..0x212B) | 0x212E | (0x2180..0x2182) | (0x3041..0x3094) | (0x30A1..0x30FA) | (0x3105..0x312C) | (0xAC00..0xD7A3); + + # Full Unicode 3.1 requires: Char = 0x9 | 0xA | 0xD | (0x20..0xD7FF) | (0xE000..0xFFFD) | (0x10000..0x10FFFF); + + Char = 0x9 | 0xA | 0xD | (0x20..0xD7FF) | (0xE000..0xFFFD); + + Letter = BaseChar | Ideographic; + + NameChar = Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender; + + include CommonXml "xmlcommon.rl"; + +}%% + + %% write data; + + int XML::init( ) + { + %% write init; + cur_char = 0; + return 1; + } + + int XML::execute( const unsigned short *data, int len ) + { + const unsigned short *p = data; + const unsigned short *pe = data + len; + + %% write exec; + + if ( cs == XML_error ) + return -1; + if ( cs >= XML_first_final ) + return 1; + return 0; + } + + int XML::finish( ) + { + if ( cs == XML_error ) + return -1; + if ( cs >= XML_first_final ) + return 1; + return 0; + } + + int main() + { + return 0; + } +/* _____OUTPUT_____ +_____OUTPUT_____ */ diff --git a/test/xmlcommon.rl b/test/xmlcommon.rl new file mode 100644 index 0000000..e7a855e --- /dev/null +++ b/test/xmlcommon.rl @@ -0,0 +1,205 @@ +/* + * This file is included by xml.rl + * + * @IGNORE: yes + */ + +%%{ + + # + # Common XML grammar rules based on the XML 1.0 BNF from: + # http://www.jelks.nu/XML/xmlebnf.html + # + + machine CommonXml; + + S = (0x20 | 0x9 | 0xD | 0xA)+; + + # WAS PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]; + PubidChar = 0x20 | 0xD | 0xA | [a-zA-Z0-9] | [\-'()+,./:=?;!*#@$_%]; + + PubidLiteral = '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"; + + Name = (Letter | '_' | ':') (NameChar)*; + + Comment = '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'; + + # Used strong subtraction operator, and replaced * with +. Ragel complained since using + # * results in a machine that accepts 0 length strings, and later it's only used in an + # optional construct anyway. + # + CharData_Old = [^<&]* - ([^<&]* ']]>' [^<&]*); + CharData = [^<&]+ -- ']]>'; + + SystemLiteral = ('"' [^"]* '"') | ("'" [^']* "'"); + + Eq = S? '=' S?; + + VersionNum = ([a-zA-Z0-9_.:] | '-')+; + + # WAS S 'version' Eq (' VersionNum ' | " VersionNum ") - fixed quotes + VersionInfo = S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"'); + + ExternalID = 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral; + + PublicID = 'PUBLIC' S PubidLiteral; + + NotationDecl = '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'; + + EncName = [A-Za-z] ([A-Za-z0-9._] | '-')*; + + EncodingDecl = S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ); + + # UNUSED TextDecl = '<?xml' VersionInfo? EncodingDecl S? '?>'; + + NDataDecl = S 'NDATA' S Name; + + PEReference = '%' Name ';'; + + EntityRef = '&' Name ';'; + + CharRef = '&#' [0-9]+ ';' | '&0x' [0-9a-fA-F]+ ';'; + + Reference = EntityRef | CharRef; + + EntityValue = '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"; + + PEDef = EntityValue | ExternalID; + + EntityDef = EntityValue | (ExternalID NDataDecl?); + + PEDecl = '<!ENTITY' S '%' S Name S PEDef S? '>'; + + GEDecl = '<!ENTITY' S Name S EntityDef S? '>'; + + EntityDecl = GEDecl | PEDecl; + + Mixed = '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'; + + # WAS cp = (Name | choice | seq) ('?' | '*' | '+')?; + + # WAS seq = '(' S? cp ( S? ',' S? cp )* S? ')'; + + # WAS choice = '(' S? cp ( S? '|' S? cp )* S? ')'; + + # WAS children = (choice | seq) ('?' | '*' | '+')?; + + # TODO put validation for this in and make it clearer + alt = '?' | '*' | '+'; + children = '(' S? + ( ( Name alt? ) | + '(' | + ( ')' alt? ) | + [,|] | + S ) + ')' alt?; + + contentspec = 'EMPTY' | 'ANY' | Mixed | children; + + elementdecl = '<!ELEMENT' S Name S contentspec S? '>'; + + AttValue = '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"; + + Attribute = Name Eq AttValue; + + Nmtoken = (NameChar)+; + + # UNUSED Nmtokens = Nmtoken (S Nmtoken)*; + + Enumeration = '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'; + + NotationType = 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'; + + EnumeratedType = NotationType | Enumeration; + + TokenizedType = 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'; + + StringType = 'CDATA'; + + AttType = StringType | TokenizedType | EnumeratedType; + + DefaultDecl = '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue); + + AttDef = S Name S AttType S DefaultDecl; + + AttlistDecl = '<!ATTLIST' S Name AttDef* S? '>'; + + EmptyElemTag = '<' Name (S Attribute)* S? '/>'; + + ETag = '</' Name S? '>'; + + PITarget_Old = Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')); + PITarget = Name -- "xml"i; + + PI = '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'; + + markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment; + + doctypedecl = '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'; + + # TODO extSubsetDecl = ( markupdecl | conditionalSect | PEReference | S )*; + # UNUSED extSubsetDecl = ( markupdecl | PEReference | S )*; + + # UNUSED extSubset = TextDecl? extSubsetDecl; + + # UNUSED Ignore = Char* - (Char* ('<![' | ']]>') Char*); + + # TODO: ignoreSectContents = Ignore ('<![' ignoreSectContents ']]>' Ignore)*; + # UNUSED ignoreSectContents = Ignore ('<![' ']]>' Ignore)*; + + # UNUSED ignoreSect = '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'; + + # UNUSED includeSect = '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'; + + # UNUSED conditionalSect = includeSect | ignoreSect; + + STag = '<' Name (S Attribute)* S? '>'; + + CDStart = '<![CDATA['; + + CDEnd = ']]>'; + + # WAS CData = (Char* - (Char* ']]>' Char*)); + CData = (Char* -- CDEnd); + + CDSect = CDStart CData CDEnd; + + # UNUSED Subcode = ([a-z] | [A-Z])+; + + # UNUSED UserCode = ('x' | 'X') '-' ([a-z] | [A-Z])+; + + # UNUSED IanaCode = ('i' | 'I') '-' ([a-z] | [A-Z])+; + + # UNUSED ISO639Code = ([a-z] | [A-Z]) ([a-z] | [A-Z]); + + # UNUSED Langcode = ISO639Code | IanaCode | UserCode; + + # UNUSED LanguageID = Langcode ('-' Subcode)*; + + SDDecl = S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')); + + # UNUSED extPE = TextDecl? extSubsetDecl; + + Misc = Comment | PI | S; + + XMLDecl = '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'; + + prolog = XMLDecl? Misc* (doctypedecl Misc*)?; + + # UNUSED Names = Name (S Name)*; + + # Added fcall - TODO check logic is correct + # UNUSED extParsedEnt = TextDecl? @{fcall content;}; + + # TODO tag stack validation + + # WAS element = EmptyElemTag | STag content ETag + # WAS content = (element | CharData | Reference | CDSect | PI | Comment)*; + content = (EmptyElemTag | STag | ETag | CharData | Reference | CDSect | PI | Comment)*; + + # WAS document = prolog element Misc*; + document = prolog ( EmptyElemTag | ( STag content ETag ) ) Misc*; + + main := document; + +}%% |