summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/Makefile.am121
-rw-r--r--test/Makefile.in579
-rw-r--r--test/accum1.lm36
-rw-r--r--test/accum2.lm38
-rw-r--r--test/accum3.lm36
-rw-r--r--test/accumbt1.lm41
-rw-r--r--test/accumbt2.lm54
-rw-r--r--test/accumbt3.lm106
-rw-r--r--test/argv1.lm8
-rw-r--r--test/argv2.lm91
-rw-r--r--test/backtrack1.lm29
-rw-r--r--test/backtrack2.lm29
-rw-r--r--test/backtrack3.lm34
-rw-r--r--test/binary1.inbin0 -> 21219 bytes
-rw-r--r--test/binary1.lm1902
-rw-r--r--test/btscan1.lm47
-rw-r--r--test/btscan2.lm42
-rw-r--r--test/call1.lm17
-rw-r--r--test/commitbt.lm109
-rw-r--r--test/concat1.lm100
-rw-r--r--test/concat2.lm98
-rw-r--r--test/construct1.lm19
-rw-r--r--test/construct2.lm14
-rw-r--r--test/construct3.lm19
-rw-r--r--test/constructex.lm44
-rw-r--r--test/context1.lm39
-rw-r--r--test/context2.lm124
-rw-r--r--test/context3.lm47
-rw-r--r--test/counting1.lm109
-rw-r--r--test/counting2.lm98
-rw-r--r--test/counting3.lm130
-rw-r--r--test/counting4.lm111
-rw-r--r--test/decl1.lm5
-rw-r--r--test/decl2.lm5
-rw-r--r--test/decl3.lm3
-rw-r--r--test/div.lm42
-rw-r--r--test/exit1.lm7
-rw-r--r--test/exit2.lm24
-rw-r--r--test/exit3.lm24
-rw-r--r--test/export1.lm16
-rw-r--r--test/factor1.lm4
-rw-r--r--test/factor2.lm4
-rw-r--r--test/factor3.lm3
-rw-r--r--test/factor4.lm3
-rw-r--r--test/factor5.lm6
-rw-r--r--test/factor6.lm12
-rw-r--r--test/forloop1.lm19
-rw-r--r--test/forloop2.lm19
-rw-r--r--test/forloop3.lm21
-rw-r--r--test/func1.lm9
-rw-r--r--test/func2.lm9
-rw-r--r--test/func3.lm40
-rw-r--r--test/generate1.lm759
-rw-r--r--test/generate2.lm214
-rw-r--r--test/heredoc.lm59
-rw-r--r--test/ifblock1.lm46
-rw-r--r--test/ignore1.lm59
-rw-r--r--test/ignore2.lm36
-rw-r--r--test/ignore3.lm53
-rw-r--r--test/ignore4.lm74
-rw-r--r--test/ignore5.lm51
-rw-r--r--test/include1.lm5
-rw-r--r--test/include1a.lmi2
-rw-r--r--test/inpush1.lm134
-rw-r--r--test/inpush1a.in2
-rw-r--r--test/inpush1b.in2
-rw-r--r--test/inpush1c.in2
-rw-r--r--test/island.lm85
-rw-r--r--test/lhs1.lm42
-rw-r--r--test/liftattrs.lm83
-rw-r--r--test/literal1.lm4
-rw-r--r--test/lookup1.lm2416
-rw-r--r--test/mailbox.lm106
-rw-r--r--test/matchex.lm41
-rw-r--r--test/maxlen.lm57
-rw-r--r--test/multiregion1.lm242
-rw-r--r--test/multiregion2.lm124
-rw-r--r--test/mutualrec.lm18
-rw-r--r--test/namespace1.lm24
-rw-r--r--test/nestedcomm.lm55
-rw-r--r--test/order1.lm115
-rw-r--r--test/order2.lm116
-rw-r--r--test/parse1.lm14
-rw-r--r--test/prints.lm17
-rw-r--r--test/pull1.lm7
-rw-r--r--test/pull2.lm8
-rw-r--r--test/ragelambig1.lm72
-rw-r--r--test/ragelambig2.lm72
-rw-r--r--test/ragelambig3.lm72
-rw-r--r--test/ragelambig4.lm76
-rw-r--r--test/rediv.lm99
-rw-r--r--test/reor1.lm27
-rw-r--r--test/reor2.lm24
-rw-r--r--test/reparse.lm26
-rw-r--r--test/repeat1.lm42
-rw-r--r--test/repeat2.lm7408
-rw-r--r--test/rhsref1.lm117
-rw-r--r--test/rubyhere.lm123
-rwxr-xr-xtest/runtests.sh244
-rw-r--r--test/scope1.lm36
-rw-r--r--test/sprintf.lm4
-rw-r--r--test/string.lm60
-rw-r--r--test/superid.lm76
-rw-r--r--test/tags1.lm93
-rw-r--r--test/tags2.lm4183
-rw-r--r--test/tags3.lm322
-rw-r--r--test/tags4.lm350
-rw-r--r--test/tcontext1.lm35
-rw-r--r--test/til.lm194
-rw-r--r--test/translate1.lm28
-rw-r--r--test/translate2.lm62
-rw-r--r--test/travs1.lm286
-rw-r--r--test/treecmp1.lm25
-rw-r--r--test/typeref1.lm33
-rw-r--r--test/typeref2.lm34
-rw-r--r--test/typeref3.lm27
-rw-r--r--test/undofrag1.lm67
-rw-r--r--test/undofrag2.lm50
-rw-r--r--test/undofrag3.lm56
-rw-r--r--test/while1.lm52
120 files changed, 24093 insertions, 0 deletions
diff --git a/test/Makefile.am b/test/Makefile.am
new file mode 100644
index 0000000..a217e83
--- /dev/null
+++ b/test/Makefile.am
@@ -0,0 +1,121 @@
+TESTS = runtests.sh
+
+EXTRA_DIST = \
+ runtests.sh \
+ accum1.lm \
+ accum2.lm \
+ accum3.lm \
+ accumbt1.lm \
+ accumbt2.lm \
+ accumbt3.lm \
+ argv1.lm \
+ argv2.lm \
+ backtrack1.lm \
+ backtrack2.lm \
+ backtrack3.lm \
+ binary1.lm \
+ binary1.in \
+ btscan1.lm \
+ btscan2.lm \
+ call1.lm \
+ commitbt.lm \
+ concat1.lm \
+ concat2.lm \
+ construct1.lm \
+ construct2.lm \
+ construct3.lm \
+ constructex.lm \
+ context1.lm \
+ context2.lm \
+ context3.lm \
+ counting1.lm \
+ counting2.lm \
+ counting3.lm \
+ counting4.lm \
+ decl1.lm \
+ decl2.lm \
+ decl3.lm \
+ div.lm \
+ exit1.lm \
+ exit2.lm \
+ exit3.lm \
+ export1.lm \
+ factor1.lm \
+ factor2.lm \
+ factor3.lm \
+ factor4.lm \
+ factor5.lm \
+ factor6.lm \
+ forloop1.lm \
+ forloop2.lm \
+ forloop3.lm \
+ func1.lm \
+ func2.lm \
+ func3.lm \
+ generate1.lm \
+ generate2.lm \
+ heredoc.lm \
+ ifblock1.lm \
+ ignore1.lm \
+ ignore2.lm \
+ ignore3.lm \
+ ignore4.lm \
+ ignore5.lm \
+ include1.lm \
+ include1a.lmi \
+ inpush1.lm \
+ inpush1a.in \
+ inpush1b.in \
+ inpush1c.in \
+ island.lm \
+ lhs1.lm \
+ liftattrs.lm \
+ literal1.lm \
+ lookup1.lm \
+ mailbox.lm \
+ matchex.lm \
+ maxlen.lm \
+ multiregion1.lm \
+ multiregion2.lm \
+ mutualrec.lm \
+ namespace1.lm \
+ nestedcomm.lm \
+ order1.lm \
+ order2.lm \
+ parse1.lm \
+ prints.lm \
+ pull1.lm \
+ pull2.lm \
+ ragelambig1.lm \
+ ragelambig2.lm \
+ ragelambig3.lm \
+ ragelambig4.lm \
+ rediv.lm \
+ reor1.lm \
+ reor2.lm \
+ reparse.lm \
+ repeat1.lm \
+ repeat2.lm \
+ rhsref1.lm \
+ rubyhere.lm \
+ scope1.lm \
+ sprintf.lm \
+ string.lm \
+ superid.lm \
+ tags1.lm \
+ tags2.lm \
+ tags3.lm \
+ tags4.lm \
+ tcontext1.lm \
+ til.lm \
+ translate1.lm \
+ translate2.lm \
+ travs1.lm \
+ treecmp1.lm \
+ typeref1.lm \
+ typeref2.lm \
+ typeref3.lm \
+ undofrag1.lm \
+ undofrag2.lm \
+ undofrag3.lm \
+ while1.lm
diff --git a/test/Makefile.in b/test/Makefile.in
new file mode 100644
index 0000000..f8e9651
--- /dev/null
+++ b/test/Makefile.in
@@ -0,0 +1,579 @@
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = test
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/src/config.h \
+ $(top_builddir)/src/defs.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+SOURCES =
+DIST_SOURCES =
+am__tty_colors = \
+red=; grn=; lgn=; blu=; std=
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PUBDATE = @PUBDATE@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+TESTS = runtests.sh
+EXTRA_DIST = \
+ runtests.sh \
+ accum1.lm \
+ accum2.lm \
+ accum3.lm \
+ accumbt1.lm \
+ accumbt2.lm \
+ accumbt3.lm \
+ argv1.lm \
+ argv2.lm \
+ backtrack1.lm \
+ backtrack2.lm \
+ backtrack3.lm \
+ binary1.lm \
+ binary1.in \
+ btscan1.lm \
+ btscan2.lm \
+ call1.lm \
+ commitbt.lm \
+ concat1.lm \
+ concat2.lm \
+ construct1.lm \
+ construct2.lm \
+ construct3.lm \
+ constructex.lm \
+ context1.lm \
+ context2.lm \
+ context3.lm \
+ counting1.lm \
+ counting2.lm \
+ counting3.lm \
+ counting4.lm \
+ decl1.lm \
+ decl2.lm \
+ decl3.lm \
+ div.lm \
+ exit1.lm \
+ exit2.lm \
+ exit3.lm \
+ export1.lm \
+ factor1.lm \
+ factor2.lm \
+ factor3.lm \
+ factor4.lm \
+ factor5.lm \
+ factor6.lm \
+ forloop1.lm \
+ forloop2.lm \
+ forloop3.lm \
+ func1.lm \
+ func2.lm \
+ func3.lm \
+ generate1.lm \
+ generate2.lm \
+ heredoc.lm \
+ ifblock1.lm \
+ ignore1.lm \
+ ignore2.lm \
+ ignore3.lm \
+ ignore4.lm \
+ ignore5.lm \
+ include1.lm \
+ include1a.lmi \
+ inpush1.lm \
+ inpush1a.in \
+ inpush1b.in \
+ inpush1c.in \
+ island.lm \
+ lhs1.lm \
+ liftattrs.lm \
+ literal1.lm \
+ lookup1.lm \
+ mailbox.lm \
+ matchex.lm \
+ maxlen.lm \
+ multiregion1.lm \
+ multiregion2.lm \
+ mutualrec.lm \
+ namespace1.lm \
+ nestedcomm.lm \
+ order1.lm \
+ order2.lm \
+ parse1.lm \
+ prints.lm \
+ pull1.lm \
+ pull2.lm \
+ ragelambig1.lm \
+ ragelambig2.lm \
+ ragelambig3.lm \
+ ragelambig4.lm \
+ rediv.lm \
+ reor1.lm \
+ reor2.lm \
+ reparse.lm \
+ repeat1.lm \
+ repeat2.lm \
+ rhsref1.lm \
+ rubyhere.lm \
+ scope1.lm \
+ sprintf.lm \
+ string.lm \
+ superid.lm \
+ tags1.lm \
+ tags2.lm \
+ tags3.lm \
+ tags4.lm \
+ tcontext1.lm \
+ til.lm \
+ translate1.lm \
+ translate2.lm \
+ travs1.lm \
+ treecmp1.lm \
+ typeref1.lm \
+ typeref2.lm \
+ typeref3.lm \
+ undofrag1.lm \
+ undofrag2.lm \
+ undofrag3.lm \
+ while1.lm
+
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign test/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+check-TESTS: $(TESTS)
+ @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list=' $(TESTS) '; \
+ $(am__tty_colors); \
+ if test -n "$$list"; then \
+ for tst in $$list; do \
+ if test -f ./$$tst; then dir=./; \
+ elif test -f $$tst; then dir=; \
+ else dir="$(srcdir)/"; fi; \
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$tst[\ \ ]*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ col=$$red; res=XPASS; \
+ ;; \
+ *) \
+ col=$$grn; res=PASS; \
+ ;; \
+ esac; \
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$tst[\ \ ]*) \
+ xfail=`expr $$xfail + 1`; \
+ col=$$lgn; res=XFAIL; \
+ ;; \
+ *) \
+ failed=`expr $$failed + 1`; \
+ col=$$red; res=FAIL; \
+ ;; \
+ esac; \
+ else \
+ skip=`expr $$skip + 1`; \
+ col=$$blu; res=SKIP; \
+ fi; \
+ echo "$${col}$$res$${std}: $$tst"; \
+ done; \
+ if test "$$all" -eq 1; then \
+ tests="test"; \
+ All=""; \
+ else \
+ tests="tests"; \
+ All="All "; \
+ fi; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+ banner="$$All$$all $$tests passed"; \
+ else \
+ if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
+ banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+ banner="$$failed of $$all $$tests failed"; \
+ else \
+ if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
+ banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+ fi; \
+ fi; \
+ dashes="$$banner"; \
+ skipped=""; \
+ if test "$$skip" -ne 0; then \
+ if test "$$skip" -eq 1; then \
+ skipped="($$skip test was not run)"; \
+ else \
+ skipped="($$skip tests were not run)"; \
+ fi; \
+ test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$skipped"; \
+ fi; \
+ report=""; \
+ if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+ report="Please report to $(PACKAGE_BUGREPORT)"; \
+ test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$report"; \
+ fi; \
+ dashes=`echo "$$dashes" | sed s/./=/g`; \
+ if test "$$failed" -eq 0; then \
+ col="$$grn"; \
+ else \
+ col="$$red"; \
+ fi; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
+ test "$$failed" -eq 0; \
+ else :; fi
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: check-am install-am install-strip
+
+.PHONY: all all-am check check-TESTS check-am clean clean-generic \
+ clean-libtool distclean distclean-generic distclean-libtool \
+ distdir dvi dvi-am html html-am info info-am install \
+ install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/test/accum1.lm b/test/accum1.lm
new file mode 100644
index 0000000..f4aa963
--- /dev/null
+++ b/test/accum1.lm
@@ -0,0 +1,36 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+end
+
+def item
+ [id]
+| [`( item* `)]
+
+def start
+ [item*]
+
+parse Input: start[ stdin ]
+
+cons Output: accum<start> []
+
+for Id: id in Input {
+ send Output
+ "( [^Id] )
+}
+
+S: start = Output()
+
+print( S )
+##### IN #####
+a b c ( chocolate fudge ) d e
+##### EXP #####
+( a )
+( b )
+( c )
+( chocolate )
+( fudge )
+( d )
+( e )
diff --git a/test/accum2.lm b/test/accum2.lm
new file mode 100644
index 0000000..05e377b
--- /dev/null
+++ b/test/accum2.lm
@@ -0,0 +1,38 @@
+##### LM #####
+context ctx
+
+i: int
+j: int
+k: int
+
+lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+end
+
+def foo [id]
+
+def item
+ [id]
+| [foo]
+| [`( item* `)]
+ {
+ i = 0
+ }
+
+def start
+ [item*]
+
+end # ctx
+
+
+cons SP: parser<ctx::start> []
+SP.ctx = cons ctx []
+send SP [stdin]
+Input: ctx::start = SP()
+print( Input )
+##### IN #####
+a b c ( d e f )
+##### EXP #####
+a b c ( d e f )
diff --git a/test/accum3.lm b/test/accum3.lm
new file mode 100644
index 0000000..956bb3f
--- /dev/null
+++ b/test/accum3.lm
@@ -0,0 +1,36 @@
+##### LM #####
+lex
+ literal `-
+ token file /^('-'|0)(^0)*/
+end
+
+token word /(^0)+/
+token zero /0/
+
+lex
+ token single /[qvh]/
+ token with_opt /[oi]/
+end
+
+def item
+ [file zero]
+| [`- single* zero]
+| [`- with_opt zero? word zero]
+
+def args
+ [word zero item*]
+
+cons ArgParser: parser<args> []
+
+ArgV: list<str> ArgV = argv
+for A: str in ArgV
+ send ArgParser [A '\0']
+
+Args: args = ArgParser()
+
+print_xml( Args )
+print( '\n' )
+##### ARGS #####
+-qv -h -o output sdf -i eth0 file
+##### EXP #####
+<args><word>./working/accum3</word><zero>&#0;</zero><_repeat_item><item><_literal_0001>-</_literal_0001><_repeat_single><single>q</single><single>v</single></_repeat_single><zero>&#0;</zero></item><item><_literal_0001>-</_literal_0001><_repeat_single><single>h</single></_repeat_single><zero>&#0;</zero></item><item><_literal_0001>-</_literal_0001><with_opt>o</with_opt><_opt_zero><zero>&#0;</zero></_opt_zero><word>output</word><zero>&#0;</zero></item><item><file>sdf</file><zero>&#0;</zero></item><item><_literal_0001>-</_literal_0001><with_opt>i</with_opt><_opt_zero><zero>&#0;</zero></_opt_zero><word>eth0</word><zero>&#0;</zero></item><item><file>file</file><zero>&#0;</zero></item></_repeat_item></args>
diff --git a/test/accumbt1.lm b/test/accumbt1.lm
new file mode 100644
index 0000000..77d56f5
--- /dev/null
+++ b/test/accumbt1.lm
@@ -0,0 +1,41 @@
+##### LM #####
+
+# Token names.
+lex
+ token number /[0-9]+/
+ token id /[a-z]+/
+ token string /'"' [^"]* '"'/
+ ignore ws / [ \t\n]+ /
+end
+
+def prefix [id]
+
+def choice1
+ [number number]
+| [number]
+
+def choice2
+ [string id]
+| [number number]
+| [id number]
+| [number]
+
+def start
+ [prefix choice1 choice2 string id id]
+
+cons I: parser<start> []
+
+send I " id "
+send I " 77 "
+send I " 88 "
+send I " \"hello\" "
+send I " dude "
+send I " dude\n"
+
+S: start = I()
+S = match S
+ ~id 77 88 "hello" dude dude
+print_xml( S )
+print( '\n' )
+##### EXP #####
+<start><prefix><id>id</id></prefix><choice1><number>77</number></choice1><choice2><number>88</number></choice2><string>"hello"</string><id>dude</id><id>dude</id></start>
diff --git a/test/accumbt2.lm b/test/accumbt2.lm
new file mode 100644
index 0000000..6d21c30
--- /dev/null
+++ b/test/accumbt2.lm
@@ -0,0 +1,54 @@
+##### LM #####
+context accum_bt
+
+lex
+ ignore /[ \t\n]+/
+ token id1 /[a-zA-Z_][a-zA-Z_0-9]*/
+
+ def one [ id1* ]
+end
+
+OneParser: accum<one>
+
+lex
+ ignore /[ \t]+/
+ token id2 /[a-zA-Z_][a-zA-Z_0-9]*/
+ literal `! `;
+ token NL /'\n'/
+
+ def A1 []
+ { print( "A1\n" ) }
+
+ def A2 []
+ { print( "A2\n" ) }
+
+ def item2
+ [id2]
+ {
+ send OneParser [' extra ']
+ send OneParser [$r1]
+ }
+
+ def two
+ [A1 item2* `! NL]
+ | [A2 item2* `; NL]
+end
+
+end # accum_bt
+
+AccumBt: accum_bt = cons accum_bt[]
+AccumBt.OneParser = cons parser<accum_bt::one>[]
+
+parse Two: accum_bt::two(AccumBt)[ stdin ]
+
+send AccumBt.OneParser ['\n'] eos
+
+print( ^Two )
+print( ^( AccumBt.OneParser.tree ) '\n' )
+##### IN #####
+a b c d e ;
+##### EXP #####
+A1
+A2
+a b c d e ;
+extra a extra b extra c extra d extra e
diff --git a/test/accumbt3.lm b/test/accumbt3.lm
new file mode 100644
index 0000000..3753282
--- /dev/null
+++ b/test/accumbt3.lm
@@ -0,0 +1,106 @@
+##### LM #####
+#
+# Tokens
+#
+
+
+# Any single character can be a literal
+lex
+ # Ignore whitespace.
+ ignore /[ \t\n\r\v]+/
+
+ # Open and close id
+ token id /[a-zA-Z_][a-zA-Z0-9_]*/
+
+ token open_paren /'('/
+ {
+ parse_stop NC: nested_comment[ input ]
+ print( 'discarding: ' NC '\n' )
+ }
+end
+
+#
+# Token translation
+#
+
+lex
+ literal `( `)
+ token nc_data /[^()]+/
+end
+
+def nc_item
+ [nc_data]
+| [nested_comment]
+
+def nested_comment
+ [`( nc_item* `)]
+
+def nested [id*]
+
+#
+# Accumulator.
+#
+context accum_bt
+
+NestedParser: accum<nested>
+
+lex
+ ignore /[ \t]+/
+ token word /[a-zA-Z0-9/*+_\-]+/
+ token stuff /[a-zA-Z0-9()/*+_\- ]+/
+ literal `! `;
+ token NL /'\n'/
+
+end
+
+def A1 []
+ { print( "A1\n" ) }
+
+def A2 []
+ { print( "A2\n" ) }
+
+def item
+ [word]
+ {
+ send NestedParser [' ']
+ send NestedParser [$r1]
+ send NestedParser [' ']
+ }
+|
+ [stuff]
+ {
+ send NestedParser [' ']
+ send NestedParser [$r1]
+ send NestedParser [' ']
+ }
+
+def two
+ [A1 item* `! NL]
+| [A2 item* `; NL]
+
+end # accum_bt
+
+cons AccumBt: accum_bt[]
+AccumBt.NestedParser = cons parser<nested>[]
+
+parse Two: accum_bt::two(AccumBt)[ stdin ]
+
+send AccumBt.NestedParser [] eos
+Nested: nested = AccumBt.NestedParser.tree
+
+print( '\n------------\n' )
+print( ^Nested '\n' )
+print( ^Two '\n' )
+
+##### IN #####
+hello there ( (this is a nested comment /*sdf asd_++_stuff) ) and this is not ;
+##### EXP #####
+A1
+discarding: ( (this is a nested comment /*sdf asd_++_stuff) )
+A2
+discarding: ( (this is a nested comment /*sdf asd_++_stuff) )
+
+------------
+hello there and this is not
+hello there ( (this is a nested comment /*sdf asd_++_stuff) ) and this is not ;
+
diff --git a/test/argv1.lm b/test/argv1.lm
new file mode 100644
index 0000000..74086e7
--- /dev/null
+++ b/test/argv1.lm
@@ -0,0 +1,8 @@
+##### LM #####
+
+print_xml( argv )
+print( '\n' )
+##### ARGS #####
+a b c 1 2 3
+##### EXP #####
+<__list0><str>./working/argv1</str><str>a</str><str>b</str><str>c</str><str>1</str><str>2</str><str>3</str></__list0>
diff --git a/test/argv2.lm b/test/argv2.lm
new file mode 100644
index 0000000..5c84564
--- /dev/null
+++ b/test/argv2.lm
@@ -0,0 +1,91 @@
+##### LM #####
+lex
+ literal `-
+ token file /^('-'|0)(^0)*/
+end
+
+lex
+ token single /[qvh]/
+ token with_opt /[oi]/
+ token dash /'-'/
+
+ literal `help `verbose `input `=
+end
+
+def long
+ [`help]
+| [`verbose]
+
+def long_with_opt
+ [`input]
+
+def long_eqals
+ [`=]
+| [zero]
+
+token word /(^0)+/
+token zero /0/
+
+def item
+ [`- single* zero]
+| [`- with_opt zero? word zero]
+| [`- dash long zero]
+| [`- dash long_with_opt long_eqals word zero]
+| [file zero]
+
+def args
+ [word zero item*]
+
+# The argument parser. Using an accumulator so we can send nulls after each
+# arg.
+cons ArgParser: parser<args>[]
+
+# Parse the args and extract the result into Args.
+ArgV: list<str> = argv
+for A: str in ArgV
+ send ArgParser [A '\0']
+Args: args = ArgParser()
+
+# Process the args.
+for Item: item in Args {
+ if match Item
+ [`- SL: single* zero]
+ {
+ for S: single in SL
+ print( "single: [$S]\n" )
+ }
+ elsif match Item
+ [`- W: with_opt zero? Opt: word zero]
+ {
+ print( "with opt: [$W] -> [$Opt]\n" )
+ }
+ elsif match Item
+ [`- dash L: long zero]
+ {
+ print("long: [$L]\n" )
+ }
+ elsif match Item
+ [`- dash LO: long_with_opt long_eqals LongOpt: word zero]
+ {
+ print("long: [$LO] -> [$LongOpt]\n" )
+ }
+ elsif match Item
+ [F: file zero]
+ {
+ print("file: [$F]\n" )
+ }
+}
+
+##### ARGS #####
+-qv -h -o output -iinput file --input=foo --input bar --help --verbose
+##### EXP #####
+single: q
+single: v
+single: h
+with opt: o -> output
+with opt: i -> input
+file: file
+long: input -> foo
+long: input -> bar
+long: help
+long: verbose
diff --git a/test/backtrack1.lm b/test/backtrack1.lm
new file mode 100644
index 0000000..76f3705
--- /dev/null
+++ b/test/backtrack1.lm
@@ -0,0 +1,29 @@
+##### LM #####
+# Token names.
+
+lex
+ literal `+ `*
+ token number /[0-9]+/
+ ignore ws / [ \t\n]+ /
+end
+
+def F
+ [number `+]
+| [number]
+| [F `* number]
+
+def E
+ [E `+ F]
+| [F]
+
+def start
+ [E]
+
+parse S: start[ stdin ]
+R: start = match S ~ 9 + 9
+print_xml( R )
+print( '\n' )
+##### IN #####
+9 + 9
+##### EXP #####
+<start><E><E><F><number>9</number></F></E><_literal_0001>+</_literal_0001><F><number>9</number></F></E></start>
diff --git a/test/backtrack2.lm b/test/backtrack2.lm
new file mode 100644
index 0000000..fa3cdfc
--- /dev/null
+++ b/test/backtrack2.lm
@@ -0,0 +1,29 @@
+##### LM #####
+
+# Token names.
+lex
+ token id /[a-z]+/
+ ignore ws /[ \t\n]+/
+end
+
+token bang1 /'!'/
+token bang2 /'!'/
+
+def one [bang1 id id id]
+
+def two [bang2 id id id id]
+
+def prods
+ [one]
+| [two]
+
+def start
+ [prods]
+
+parse S: start[ stdin ]
+print_xml( match S "!aa bb cc dd" )
+print( '\n' )
+##### IN #####
+!aa bb cc dd
+##### EXP #####
+<start><prods><two><bang2>!</bang2><id>aa</id><id>bb</id><id>cc</id><id>dd</id></two></prods></start>
diff --git a/test/backtrack3.lm b/test/backtrack3.lm
new file mode 100644
index 0000000..8c6dfc3
--- /dev/null
+++ b/test/backtrack3.lm
@@ -0,0 +1,34 @@
+##### LM #####
+
+# Token names.
+lex
+ token number /[0-9]+/
+ token id /[a-z]+/
+ token string /'"' [^"]* '"'/
+ ignore ws / [ \t\n]+ /
+end
+
+def prefix [id]
+
+def choice1
+ [number number]
+| [number]
+
+def choice2
+ [string id]
+| [number number]
+| [id number]
+| [number]
+
+def start
+ [prefix choice1 choice2 string id id]
+ {
+ print_xml( match lhs "id 77 88 \"hello\" dude dude\n" )
+ }
+
+parse start[stdin]
+print( '\n' )
+##### IN #####
+id 77 88 "hello" dude dude
+##### EXP #####
+<start><prefix><id>id</id></prefix><choice1><number>77</number></choice1><choice2><number>88</number></choice2><string>"hello"</string><id>dude</id><id>dude</id></start>
diff --git a/test/binary1.in b/test/binary1.in
new file mode 100644
index 0000000..8da7d3d
--- /dev/null
+++ b/test/binary1.in
Binary files differ
diff --git a/test/binary1.lm b/test/binary1.lm
new file mode 100644
index 0000000..7fc8337
--- /dev/null
+++ b/test/binary1.lm
@@ -0,0 +1,1902 @@
+##### LM #####
+
+context binary
+
+# Used for most of the grammar.
+token octet /any/
+
+# Filled in during the parsing of resource records. Determine what RR_UNKNOWN
+# translates to.
+rr_type_value: int
+rr_class_value: int
+
+# Tokens generated from RR_UNKNOWN. Used to pick the kind
+# of resource record to attempt to parse.
+token RR_A // # 1 a host address
+token RR_NS // # 2 an authoritative name server
+token RR_MD // # 3 a mail destination (Obsolete - use MX)
+token RR_MF // # 4 a mail forwarder (Obsolete - use MX)
+token RR_CNAME // # 5 the canonical name for an alias
+token RR_SOA // # 6 marks the start of a zone of authority
+token RR_MB // # 7 a mailbox domain name (EXPERIMENTAL)
+token RR_MG // # 8 a mail group member (EXPERIMENTAL)
+token RR_MR // # 9 a mail rename domain name (EXPERIMENTAL)
+token RR_NULL // # 10 a null RR (EXPERIMENTAL)
+token RR_WKS // # 11 a well known service description
+token RR_PTR // # 12 a domain name pointer
+token RR_HINFO // # 13 host information
+token RR_MINFO // # 14 mailbox or mail list information
+token RR_MX // # 15 mail exchange
+token RR_TXT // # 16 text strings
+
+token RR_UNKNOWN
+ /''/
+ {
+ id: int = typeid<RR_UNKNOWN>
+ if rr_type_value == 1
+ id = typeid<RR_A>
+ elsif rr_type_value == 2
+ id = typeid<RR_NS>
+ elsif rr_type_value == 5
+ id = typeid<RR_CNAME>
+ elsif rr_type_value == 12
+ id = typeid<RR_PTR>
+ elsif rr_type_value == 15
+ id = typeid<RR_MX>
+ elsif rr_type_value == 16
+ id = typeid<RR_TXT>
+
+ input.push( make_token( id '' ) )
+ }
+
+# Convert two octets in network order into an unsigned 16 bit value.
+int network_uord16( o1: octet o2: octet )
+{
+ return o1.data.uord8() * 256 + o2.data.uord8()
+}
+
+
+def message
+ [header questions answers authorities additionals]
+
+question_count: int
+answer_count: int
+authority_count: int
+additional_count: int
+
+# Message Header
+#
+# 1 1 1 1 1 1
+# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | ID |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# |QR| Opcode |AA|TC|RD|RA| Z | RCODE |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | QDCOUNT |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | ANCOUNT |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | NSCOUNT |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | ARCOUNT |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+def header
+ [header_id header_fields count count count count]
+ {
+ question_count = r3.count
+ answer_count = r4.count
+ authority_count = r5.count
+ additional_count = r6.count
+ }
+
+def header_id
+ [octet octet]
+
+def header_fields
+ [octet octet]
+
+def count
+ count: int
+ [octet octet]
+ {
+ lhs.count = network_uord16( r1 r2 )
+ }
+
+#
+# Counting Primitives
+#
+# Uses a stack of lengths. Using a stack allows for counted lists to be
+# nested. As the list is consumed it brings the count down to zero. To use
+# it, push a new count value to the list and include it in a
+# right-recursive list like so:
+#
+# def LIST
+# [count_inc ITEM LIST]
+# [count_end]
+# end
+#
+
+CL: list<int>
+
+int start_list( count: int )
+{
+ CL.push( count )
+}
+
+def count_inc
+ []
+ {
+ if CL.top == 0 {
+ reject
+ } else {
+ CL.top = CL.top - 1
+ }
+ }
+
+def count_end
+ []
+ {
+ if CL.top != 0 {
+ reject
+ } else {
+ CL.pop()
+ }
+ }
+
+#
+# Octet List
+#
+
+# General octet list. Length must be set to use this.
+def octet_list
+ [count_inc octet octet_list]
+| [count_end]
+
+
+#
+# Names
+#
+
+def name
+ [name_part* name_end]
+
+# Name part lists are terminated by a zero length or a pointer.
+def name_end
+ # Zero length ending
+ [octet]
+ {
+ val: int = r1.data.uord8()
+ if val != 0 {
+ reject
+ }
+ }
+
+ # Pointer ending
+ # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ # | 1 1| OFFSET |
+ # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+| [octet octet]
+ {
+ val: int = r1.data.uord8()
+ if val < 64 {
+ reject
+ }
+ }
+
+#
+# Get some number of bytes.
+#
+
+# How many to get
+nbytes: int
+
+# We use this token to eliminate the lookahead that would be needed to
+# cause a reduce of part_len. This forces whatever comes before nbytes to
+# be reduced before nbytes_data token is fetched from the scanner. We need
+# this because nbytes_data depends on the nbytes in the context and we need
+# to ensure that it is set.
+token nb_empty /''/
+
+# Fetch nbytes bytes.
+token nbytes_data
+ /''/
+ {
+ input.push( make_token( typeid<nbytes_data> input.pull(nbytes) ) )
+ }
+
+def nbytes
+ [nb_empty nbytes_data]
+
+def name_part
+ [part_len nbytes]
+
+
+def part_len
+ [octet]
+ {
+ # A name part list is terminated either by a zero length or a pointer,
+ # which must have the two high bits set.
+ count: int = r1.data.uord8()
+ if count == 0 || count >= 64 {
+ reject
+ } else {
+ # Set the number of bytes to get for the name part.
+ nbytes = count
+ }
+ }
+
+#
+# Resource Records
+#
+
+# 1 1 1 1 1 1
+# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | |
+# / /
+# / NAME /
+# | |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | TYPE |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | CLASS |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | TTL |
+# | |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | RDLENGTH |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--|
+# / RDATA /
+# / /
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+
+def resource_record
+ [name rr_type rr_class ttl rdlength rdata]
+
+def rr_type
+ [octet octet]
+ {
+ rr_type_value = network_uord16( r1 r2 )
+ }
+
+def rr_class
+ value: int
+ [octet octet]
+ {
+ rr_class_value = network_uord16( r1 r2 )
+ }
+
+def ttl
+ [octet octet octet octet]
+
+token rdata_bytes
+ /''/
+ {
+ input.push( make_token( typeid<rdata_bytes> input.pull(rdata_length) ) )
+ }
+
+def rdlength
+ [octet octet]
+ {
+ rdata_length = network_uord16( r1 r2 )
+ }
+
+rdata_length: int
+
+def rdata
+ [RR_UNKNOWN rdata_bytes]
+| [RR_A address]
+| [RR_NS name]
+| [RR_CNAME name]
+| [RR_PTR name]
+| [RR_MX octet octet name]
+| [RR_TXT rdata_bytes]
+
+
+#
+# Address
+#
+def address [octet octet octet octet]
+
+#
+# List of Questions
+#
+
+def questions
+ [load_question_count question_list]
+
+def load_question_count
+ []
+ {
+ start_list( question_count )
+ }
+
+def question_list
+ [count_inc question question_list]
+| [count_end]
+
+#
+# Question
+#
+
+# 1 1 1 1 1 1
+# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | |
+# / QNAME /
+# / /
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | QTYPE |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+# | QCLASS |
+# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+
+def question
+ [name qtype qclass]
+
+def qtype
+ [octet octet]
+
+def qclass
+ [octet octet]
+
+#
+# List of Answers
+#
+
+def answers
+ [load_answer_count answer_list]
+
+def load_answer_count
+ []
+ {
+ start_list( answer_count )
+ }
+
+def answer_list
+ [count_inc answer answer_list]
+| [count_end]
+
+#
+# Answer
+#
+
+def answer
+ [resource_record]
+
+#
+# List of Authorities
+#
+
+def authorities
+ [load_authority_count authority_list]
+
+def load_authority_count
+ []
+ {
+ start_list( authority_count )
+ }
+
+def authority_list
+ [count_inc authority authority_list]
+| [count_end]
+
+#
+# Authority
+#
+
+def authority
+ [resource_record]
+
+#
+# List of Additionals
+#
+
+def additionals
+ [load_additional_count additional_list]
+
+def load_additional_count
+ []
+ {
+ start_list( additional_count )
+ }
+
+def additional_list
+ [count_inc additional additional_list]
+| [count_end]
+
+#
+# Additional
+#
+
+def additional
+ [resource_record]
+
+
+def start
+ [message*]
+
+#
+# Grammar End.
+#
+
+int print_RR_UNKNOWN( s: start )
+{
+ for I:rdata in s {
+ if match I [u:RR_UNKNOWN rdata_bytes] {
+ print( 'UNKNOWN TYPE\n' )
+ }
+ }
+}
+
+int print_RR_A( s: start )
+{
+ for I:rdata in s {
+ if match I [RR_A o1:octet o2:octet o3:octet o4:octet] {
+ print( 'RR_A: ' o1.data.uord8() '.' o2.data.uord8() '.'
+ o3.data.uord8() '.' o4.data.uord8() '\n' )
+ }
+ }
+}
+
+int print_name( n: name m: map<int name> )
+{
+ for P: name_part in n {
+ match P [part_len D:nbytes]
+ print( D '.' )
+ }
+
+ for E:name_end in n {
+ if match E [o1:octet o2:octet] {
+ val: int = (o1.data.uord8() - 192) * 256 + o2.data.uord8()
+ print( '[' val ']' )
+ nameInMap: name = m.find( val )
+ print_name( nameInMap m )
+ }
+ }
+}
+
+int print_all_names( s: start )
+{
+ for M: message in s {
+ construct m: map<int name> []
+
+ O: octet = octet in M
+
+ for N: name in M {
+ match N [name_part* E:name_end]
+
+ for NP: name_part* in N {
+ if match NP [L: octet nbytes name_part*] {
+ messageOffset: int = L.pos - O.pos
+ construct n: name [NP E]
+ m.insert( messageOffset n )
+ }
+ }
+ }
+
+ for I: name in M {
+ print_name( I m )
+ print( '\n' )
+ }
+ }
+}
+
+end # binary
+
+cons Binary: binary[]
+Binary.CL = cons list<int> []
+
+parse S: binary::start(Binary) [ stdin ]
+print_all_names( S )
+print( '*** SUCCESS ***\n' )
+##### EXP #####
+www.google.ca.
+www.google.ca.
+[12]www.google.ca.
+www.google.com.
+[43]www.google.com.
+www.l.[47]google.com.
+[71]www.l.[47]google.com.
+[75]l.[47]google.com.
+a.[75]l.[47]google.com.
+[75]l.[47]google.com.
+d.[75]l.[47]google.com.
+[75]l.[47]google.com.
+e.[75]l.[47]google.com.
+[75]l.[47]google.com.
+f.[75]l.[47]google.com.
+[75]l.[47]google.com.
+g.[75]l.[47]google.com.
+[75]l.[47]google.com.
+b.[75]l.[47]google.com.
+[107]a.[75]l.[47]google.com.
+[123]d.[75]l.[47]google.com.
+[139]e.[75]l.[47]google.com.
+[155]f.[75]l.[47]google.com.
+[171]g.[75]l.[47]google.com.
+[187]b.[75]l.[47]google.com.
+clients1.google.ca.
+clients1.google.ca.
+[12]clients1.google.ca.
+clients.l.google.com.
+[48]clients.l.google.com.
+[56]l.google.com.
+e.[56]l.google.com.
+[56]l.google.com.
+f.[56]l.google.com.
+[56]l.google.com.
+g.[56]l.google.com.
+[56]l.google.com.
+b.[56]l.google.com.
+[56]l.google.com.
+a.[56]l.google.com.
+[56]l.google.com.
+d.[56]l.google.com.
+[98]e.[56]l.google.com.
+[114]f.[56]l.google.com.
+[130]g.[56]l.google.com.
+[146]b.[56]l.google.com.
+[162]a.[56]l.google.com.
+[178]d.[56]l.google.com.
+en-us.fxfeeds.mozilla.com.
+en-us.fxfeeds.mozilla.com.
+[12]en-us.fxfeeds.mozilla.com.
+fxfeeds.mozilla.org.
+[55]fxfeeds.mozilla.org.
+[63]mozilla.org.
+ns1.[63]mozilla.org.
+[63]mozilla.org.
+ns2.[63]mozilla.org.
+[63]mozilla.org.
+ns3.[63]mozilla.org.
+[104]ns1.[63]mozilla.org.
+[122]ns2.[63]mozilla.org.
+[140]ns3.[63]mozilla.org.
+fxfeeds.mozilla.com.
+fxfeeds.mozilla.com.
+[12]fxfeeds.mozilla.com.
+fxfeeds.mozilla.org.
+[49]fxfeeds.mozilla.org.
+[57]mozilla.org.
+ns2.[57]mozilla.org.
+[57]mozilla.org.
+ns3.[57]mozilla.org.
+[57]mozilla.org.
+ns1.[57]mozilla.org.
+[98]ns2.[57]mozilla.org.
+[116]ns3.[57]mozilla.org.
+[134]ns1.[57]mozilla.org.
+newsrss.bbc.co.uk.
+newsrss.bbc.co.uk.
+[12]newsrss.bbc.co.uk.
+newsrss.bbc.net.uk.
+[47]newsrss.bbc.net.uk.
+[55]bbc.net.uk.
+ns0.thdo.bbc.co.[63]uk.
+[55]bbc.net.uk.
+ns0.rbsov.[104]bbc.co.[63]uk.
+[95]ns0.thdo.bbc.co.[63]uk.
+[125]ns0.rbsov.[104]bbc.co.[63]uk.
+news.google.ca.
+news.google.ca.
+[12]news.google.ca.
+news.google.com.
+[44]news.google.com.
+news.l.[49]google.com.
+[73]news.l.[49]google.com.
+[78]l.[49]google.com.
+d.[78]l.[49]google.com.
+[78]l.[49]google.com.
+e.[78]l.[49]google.com.
+[78]l.[49]google.com.
+f.[78]l.[49]google.com.
+[78]l.[49]google.com.
+g.[78]l.[49]google.com.
+[78]l.[49]google.com.
+b.[78]l.[49]google.com.
+[78]l.[49]google.com.
+a.[78]l.[49]google.com.
+[110]d.[78]l.[49]google.com.
+[126]e.[78]l.[49]google.com.
+[142]f.[78]l.[49]google.com.
+[158]g.[78]l.[49]google.com.
+[174]b.[78]l.[49]google.com.
+[190]a.[78]l.[49]google.com.
+nt3.ggpht.com.
+nt3.ggpht.com.
+[12]nt3.ggpht.com.
+news.l.google.com.
+[43]news.l.google.com.
+[48]l.google.com.
+f.[48]l.google.com.
+[48]l.google.com.
+g.[48]l.google.com.
+[48]l.google.com.
+b.[48]l.google.com.
+[48]l.google.com.
+a.[48]l.google.com.
+[48]l.google.com.
+d.[48]l.google.com.
+[48]l.google.com.
+e.[48]l.google.com.
+[90]f.[48]l.google.com.
+[106]g.[48]l.google.com.
+[122]b.[48]l.google.com.
+[138]a.[48]l.google.com.
+[154]d.[48]l.google.com.
+[170]e.[48]l.google.com.
+csi.gstatic.com.
+csi.gstatic.com.
+[12]csi.gstatic.com.
+csi.l.google.com.
+[45]csi.l.google.com.
+[49]l.google.com.
+d.[49]l.google.com.
+[49]l.google.com.
+e.[49]l.google.com.
+[49]l.google.com.
+f.[49]l.google.com.
+[49]l.google.com.
+g.[49]l.google.com.
+[49]l.google.com.
+b.[49]l.google.com.
+[49]l.google.com.
+a.[49]l.google.com.
+[91]d.[49]l.google.com.
+[107]e.[49]l.google.com.
+[123]f.[49]l.google.com.
+[139]g.[49]l.google.com.
+[155]b.[49]l.google.com.
+[171]a.[49]l.google.com.
+www.nytimes.com.
+www.nytimes.com.
+[12]www.nytimes.com.
+nytimes.com.
+nydns1.about.[57]com.
+[49]nytimes.com.
+ns1t.[49]nytimes.com.
+[49]nytimes.com.
+nydns2.[79]about.[57]com.
+[72]nydns1.about.[57]com.
+[99]ns1t.[49]nytimes.com.
+[118]nydns2.[79]about.[57]com.
+graphics8.nytimes.com.
+graphics8.nytimes.com.
+[12]graphics8.nytimes.com.
+graphics478.nytimes.com.edgesuite.net.
+[51]graphics478.nytimes.com.edgesuite.net.
+a1116.x.akamai.[85]net.
+[102]a1116.x.akamai.[85]net.
+[102]a1116.x.akamai.[85]net.
+[108]x.akamai.[85]net.
+n0x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n1x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n6x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n3x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n2x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n7x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n5x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n8x.[110]akamai.[85]net.
+[108]x.akamai.[85]net.
+n4x.[110]akamai.[85]net.
+[163]n0x.[110]akamai.[85]net.
+[181]n1x.[110]akamai.[85]net.
+[199]n6x.[110]akamai.[85]net.
+[217]n3x.[110]akamai.[85]net.
+[235]n2x.[110]akamai.[85]net.
+[253]n7x.[110]akamai.[85]net.
+[271]n5x.[110]akamai.[85]net.
+[289]n8x.[110]akamai.[85]net.
+[307]n4x.[110]akamai.[85]net.
+timespeople.nytimes.com.
+timespeople.nytimes.com.
+[12]timespeople.nytimes.com.
+nytimes.com.
+nydns2.about.[65]com.
+[57]nytimes.com.
+nydns1.[87]about.[65]com.
+[57]nytimes.com.
+ns1t.[57]nytimes.com.
+[80]nydns2.about.[65]com.
+[107]nydns1.[87]about.[65]com.
+[128]ns1t.[57]nytimes.com.
+googleads.g.doubleclick.net.
+googleads.g.doubleclick.net.
+[12]googleads.g.doubleclick.net.
+pagead.l.doubleclick.net.
+[57]pagead.l.doubleclick.net.
+[64]l.doubleclick.net.
+b.l.google.com.
+[64]l.doubleclick.net.
+d.[113]l.google.com.
+[64]l.doubleclick.net.
+e.[113]l.google.com.
+[64]l.doubleclick.net.
+f.[113]l.google.com.
+[64]l.doubleclick.net.
+g.[113]l.google.com.
+[64]l.doubleclick.net.
+a.[113]l.google.com.
+[111]b.l.google.com.
+[139]d.[113]l.google.com.
+[155]e.[113]l.google.com.
+[171]f.[113]l.google.com.
+[187]g.[113]l.google.com.
+[203]a.[113]l.google.com.
+up.nytimes.com.
+up.nytimes.com.
+[12]up.nytimes.com.
+up.about.akadns.net.
+[44]up.about.akadns.net.
+[53]akadns.net.
+eur1.[53]akadns.net.
+[53]akadns.net.
+use3.[53]akadns.net.
+[53]akadns.net.
+use4.[53]akadns.net.
+[53]akadns.net.
+usw2.[53]akadns.net.
+[53]akadns.net.
+za.akadns.org.
+[53]akadns.net.
+zb.[172]akadns.org.
+[53]akadns.net.
+zc.[172]akadns.org.
+[53]akadns.net.
+zd.[172]akadns.org.
+[53]akadns.net.
+asia9.[53]akadns.net.
+[93]eur1.[53]akadns.net.
+[112]use3.[53]akadns.net.
+[131]use4.[53]akadns.net.
+[150]usw2.[53]akadns.net.
+[169]za.akadns.org.
+[196]zb.[172]akadns.org.
+[213]zc.[172]akadns.org.
+[230]zd.[172]akadns.org.
+[247]asia9.[53]akadns.net.
+pix04.revsci.net.
+pix04.revsci.net.
+[12]pix04.revsci.net.
+revsci.net.
+ns2.p16.dynect.[57]net.
+[50]revsci.net.
+ns3.[76]p16.dynect.[57]net.
+[50]revsci.net.
+ns1.[76]p16.dynect.[57]net.
+[50]revsci.net.
+ns4.[76]p16.dynect.[57]net.
+[72]ns2.p16.dynect.[57]net.
+[101]ns3.[76]p16.dynect.[57]net.
+[119]ns1.[76]p16.dynect.[57]net.
+[137]ns4.[76]p16.dynect.[57]net.
+wt.o.nytimes.com.
+wt.o.nytimes.com.
+[12]wt.o.nytimes.com.
+nytimes.webtrends.akadns.net.
+[46]nytimes.webtrends.akadns.net.
+[64]akadns.net.
+use3.[64]akadns.net.
+[64]akadns.net.
+use4.[64]akadns.net.
+[64]akadns.net.
+usw2.[64]akadns.net.
+[64]akadns.net.
+za.akadns.org.
+[64]akadns.net.
+zb.[164]akadns.org.
+[64]akadns.net.
+zc.[164]akadns.org.
+[64]akadns.net.
+zd.[164]akadns.org.
+[64]akadns.net.
+asia9.[64]akadns.net.
+[64]akadns.net.
+eur1.[64]akadns.net.
+[104]use3.[64]akadns.net.
+[123]use4.[64]akadns.net.
+[142]usw2.[64]akadns.net.
+[161]za.akadns.org.
+[188]zb.[164]akadns.org.
+[205]zc.[164]akadns.org.
+[222]zd.[164]akadns.org.
+[239]asia9.[64]akadns.net.
+[259]eur1.[64]akadns.net.
+te.nytimes.com.
+ar.voicefive.com.
+te.nytimes.com.
+[12]te.nytimes.com.
+nytd.te.tacoda.net.
+[44]nytd.te.tacoda.net.
+te.tacoda.akadns.[59]net.
+[76]te.tacoda.akadns.[59]net.
+[86]akadns.[59]net.
+use4.[86]akadns.[59]net.
+[86]akadns.[59]net.
+usw2.[86]akadns.[59]net.
+[86]akadns.[59]net.
+za.akadns.org.
+[86]akadns.[59]net.
+zb.[164]akadns.org.
+[86]akadns.[59]net.
+zc.[164]akadns.org.
+[86]akadns.[59]net.
+zd.[164]akadns.org.
+[86]akadns.[59]net.
+asia9.[86]akadns.[59]net.
+[86]akadns.[59]net.
+eur1.[86]akadns.[59]net.
+[86]akadns.[59]net.
+use3.[86]akadns.[59]net.
+[123]use4.[86]akadns.[59]net.
+[142]usw2.[86]akadns.[59]net.
+[161]za.akadns.org.
+[188]zb.[164]akadns.org.
+[205]zc.[164]akadns.org.
+[222]zd.[164]akadns.org.
+[239]asia9.[86]akadns.[59]net.
+[259]eur1.[86]akadns.[59]net.
+[278]use3.[86]akadns.[59]net.
+ar.voicefive.com.
+[12]ar.voicefive.com.
+ar.gta.voicefive.com.
+[46]ar.gta.voicefive.com.
+[49]gta.voicefive.com.
+gta02.ord.[53]voicefive.com.
+[49]gta.voicefive.com.
+gta01.iad.[53]voicefive.com.
+[49]gta.voicefive.com.
+gta01.[102]ord.[53]voicefive.com.
+[49]gta.voicefive.com.
+gta02.[126]iad.[53]voicefive.com.
+[96]gta02.ord.[53]voicefive.com.
+[120]gta01.iad.[53]voicefive.com.
+[144]gta01.[102]ord.[53]voicefive.com.
+[164]gta02.[126]iad.[53]voicefive.com.
+www.google.com.
+www.google.com.
+[12]www.google.com.
+www.l.google.com.
+[44]www.l.google.com.
+[48]l.google.com.
+d.[48]l.google.com.
+[48]l.google.com.
+e.[48]l.google.com.
+[48]l.google.com.
+f.[48]l.google.com.
+[48]l.google.com.
+g.[48]l.google.com.
+[48]l.google.com.
+b.[48]l.google.com.
+[48]l.google.com.
+a.[48]l.google.com.
+[90]d.[48]l.google.com.
+[106]e.[48]l.google.com.
+[122]f.[48]l.google.com.
+[138]g.[48]l.google.com.
+[154]b.[48]l.google.com.
+[170]a.[48]l.google.com.
+www.google-analytics.com.
+www.google-analytics.com.
+[12]www.google-analytics.com.
+www-google-analytics.l.google.com.
+[54]www-google-analytics.l.google.com.
+[75]l.google.com.
+f.[75]l.google.com.
+[75]l.google.com.
+g.[75]l.google.com.
+[75]l.google.com.
+b.[75]l.google.com.
+[75]l.google.com.
+a.[75]l.google.com.
+[75]l.google.com.
+d.[75]l.google.com.
+[75]l.google.com.
+e.[75]l.google.com.
+[117]f.[75]l.google.com.
+[133]g.[75]l.google.com.
+[149]b.[75]l.google.com.
+[165]a.[75]l.google.com.
+[181]d.[75]l.google.com.
+[197]e.[75]l.google.com.
+pagead2.googlesyndication.com.
+pagead2.googlesyndication.com.
+[12]pagead2.googlesyndication.com.
+pagead.l.google.com.
+[59]pagead.l.google.com.
+[66]l.google.com.
+g.[66]l.google.com.
+[66]l.google.com.
+b.[66]l.google.com.
+[66]l.google.com.
+a.[66]l.google.com.
+[66]l.google.com.
+d.[66]l.google.com.
+[66]l.google.com.
+e.[66]l.google.com.
+[66]l.google.com.
+f.[66]l.google.com.
+[108]g.[66]l.google.com.
+[124]b.[66]l.google.com.
+[140]a.[66]l.google.com.
+[156]d.[66]l.google.com.
+[172]e.[66]l.google.com.
+[188]f.[66]l.google.com.
+maps.google.com.
+maps.google.com.
+[12]maps.google.com.
+maps.l.google.com.
+[45]maps.l.google.com.
+[50]l.google.com.
+b.[50]l.google.com.
+[50]l.google.com.
+a.[50]l.google.com.
+[50]l.google.com.
+d.[50]l.google.com.
+[50]l.google.com.
+e.[50]l.google.com.
+[50]l.google.com.
+f.[50]l.google.com.
+[50]l.google.com.
+g.[50]l.google.com.
+[92]b.[50]l.google.com.
+[108]a.[50]l.google.com.
+[124]d.[50]l.google.com.
+[140]e.[50]l.google.com.
+[156]f.[50]l.google.com.
+[172]g.[50]l.google.com.
+maps.gstatic.com.
+maps.gstatic.com.
+[12]maps.gstatic.com.
+www2.l.google.com.
+[46]www2.l.google.com.
+[51]l.google.com.
+a.[51]l.google.com.
+[51]l.google.com.
+d.[51]l.google.com.
+[51]l.google.com.
+e.[51]l.google.com.
+[51]l.google.com.
+f.[51]l.google.com.
+[51]l.google.com.
+g.[51]l.google.com.
+[51]l.google.com.
+b.[51]l.google.com.
+[93]a.[51]l.google.com.
+[109]d.[51]l.google.com.
+[125]e.[51]l.google.com.
+[141]f.[51]l.google.com.
+[157]g.[51]l.google.com.
+[173]b.[51]l.google.com.
+www.calgaryherald.com.
+www.calgaryherald.com.
+[12]www.calgaryherald.com.
+calgaryherald.com.
+ns1.canwest.[69]com.
+[55]calgaryherald.com.
+ns2.[88]canwest.[69]com.
+[84]ns1.canwest.[69]com.
+[110]ns2.[88]canwest.[69]com.
+a123.g.akamai.net.
+a123.g.akamai.net.
+[12]a123.g.akamai.net.
+[12]a123.g.akamai.net.
+members.canada.com.
+members.canada.com.
+[12]members.canada.com.
+canada.com.
+ns2.canwest.[59]com.
+[52]canada.com.
+ns1.[78]canwest.[59]com.
+[74]ns2.canwest.[59]com.
+[100]ns1.[78]canwest.[59]com.
+www.canada.com.
+www.canada.com.
+[12]www.canada.com.
+canada.com.
+ns1.canwest.[55]com.
+[48]canada.com.
+ns2.[74]canwest.[55]com.
+[70]ns1.canwest.[55]com.
+[96]ns2.[74]canwest.[55]com.
+s9.addthis.com.
+s9.addthis.com.
+[12]s9.addthis.com.
+wildcard.addthis.com.edgekey.net.
+[44]wildcard.addthis.com.edgekey.net.
+e2943.c.akamaiedge.[73]net.
+[90]e2943.c.akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n7c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n5c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n8c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n0c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n6c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n1c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n4c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n3c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n2c.[98]akamaiedge.[73]net.
+[139]n7c.[98]akamaiedge.[73]net.
+[157]n5c.[98]akamaiedge.[73]net.
+[175]n8c.[98]akamaiedge.[73]net.
+[193]n0c.[98]akamaiedge.[73]net.
+[211]n6c.[98]akamaiedge.[73]net.
+[229]n1c.[98]akamaiedge.[73]net.
+[247]n4c.[98]akamaiedge.[73]net.
+[265]n3c.[98]akamaiedge.[73]net.
+[283]n2c.[98]akamaiedge.[73]net.
+communities.canada.com.
+communities.canada.com.
+[12]communities.canada.com.
+canada.com.
+ns1.canwest.[63]com.
+[56]canada.com.
+ns2.[82]canwest.[63]com.
+[78]ns1.canwest.[63]com.
+[104]ns2.[82]canwest.[63]com.
+canwestglobal.112.2o7.net.
+beacon.securestudies.com.
+beacon.securestudies.com.
+[12]beacon.securestudies.com.
+beacon.gta.securestudies.com.
+[54]beacon.gta.securestudies.com.
+[61]gta.securestudies.com.
+gta02.iad.[65]securestudies.com.
+[61]gta.securestudies.com.
+gta02.ord.[65]securestudies.com.
+[61]gta.securestudies.com.
+gta01.[118]iad.[65]securestudies.com.
+[61]gta.securestudies.com.
+gta01.[142]ord.[65]securestudies.com.
+[112]gta02.iad.[65]securestudies.com.
+[136]gta02.ord.[65]securestudies.com.
+[160]gta01.[118]iad.[65]securestudies.com.
+[180]gta01.[142]ord.[65]securestudies.com.
+canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+2o7.net.
+ns1.dal.omniture.com.
+[155]2o7.net.
+ns1.sj1.[182]omniture.com.
+[155]2o7.net.
+ns1.sj2.[182]omniture.com.
+[174]ns1.dal.omniture.com.
+[208]ns1.sj1.[182]omniture.com.
+[230]ns1.sj2.[182]omniture.com.
+s7.addthis.com.
+s7.addthis.com.
+[12]s7.addthis.com.
+wildcard.addthis.com.edgekey.net.
+[44]wildcard.addthis.com.edgekey.net.
+e2943.c.akamaiedge.[73]net.
+[90]e2943.c.akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n5c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n8c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n0c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n6c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n1c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n4c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n3c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n2c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n7c.[98]akamaiedge.[73]net.
+[139]n5c.[98]akamaiedge.[73]net.
+[157]n8c.[98]akamaiedge.[73]net.
+[175]n0c.[98]akamaiedge.[73]net.
+[193]n6c.[98]akamaiedge.[73]net.
+[211]n1c.[98]akamaiedge.[73]net.
+[229]n4c.[98]akamaiedge.[73]net.
+[247]n3c.[98]akamaiedge.[73]net.
+[265]n2c.[98]akamaiedge.[73]net.
+[283]n7c.[98]akamaiedge.[73]net.
+csi.gstatic.com.
+csi.gstatic.com.
+[12]csi.gstatic.com.
+csi.l.google.com.
+[45]csi.l.google.com.
+[49]l.google.com.
+d.[49]l.google.com.
+[49]l.google.com.
+e.[49]l.google.com.
+[49]l.google.com.
+f.[49]l.google.com.
+[49]l.google.com.
+g.[49]l.google.com.
+[49]l.google.com.
+b.[49]l.google.com.
+[49]l.google.com.
+a.[49]l.google.com.
+[91]d.[49]l.google.com.
+[107]e.[49]l.google.com.
+[123]f.[49]l.google.com.
+[139]g.[49]l.google.com.
+[155]b.[49]l.google.com.
+[171]a.[49]l.google.com.
+www.thestar.com.
+www.thestar.com.
+[12]www.thestar.com.
+[16]thestar.com.
+ns1.[16]thestar.com.
+[16]thestar.com.
+ns2.[16]thestar.com.
+[61]ns1.[16]thestar.com.
+[79]ns2.[16]thestar.com.
+beacon.scorecardresearch.com.
+beacon.scorecardresearch.com.
+[12]beacon.scorecardresearch.com.
+beacon.gta.scorecardresearch.com.
+[58]beacon.gta.scorecardresearch.com.
+[65]gta.scorecardresearch.com.
+gta01.iad.[69]scorecardresearch.com.
+[65]gta.scorecardresearch.com.
+gta02.ord.[69]scorecardresearch.com.
+[65]gta.scorecardresearch.com.
+gta01.[150]ord.[69]scorecardresearch.com.
+[65]gta.scorecardresearch.com.
+gta02.[126]iad.[69]scorecardresearch.com.
+[120]gta01.iad.[69]scorecardresearch.com.
+[144]gta02.ord.[69]scorecardresearch.com.
+[168]gta01.[150]ord.[69]scorecardresearch.com.
+[188]gta02.[126]iad.[69]scorecardresearch.com.
+media.thestar.topscms.com.
+media.thestar.topscms.com.
+[12]media.thestar.topscms.com.
+media.thestar.topscms.com.edgesuite.net.
+[55]media.thestar.topscms.com.edgesuite.net.
+a1520.g.akamai.[91]net.
+[108]a1520.g.akamai.[91]net.
+[108]a1520.g.akamai.[91]net.
+[114]g.akamai.[91]net.
+n0g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n4g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n2g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n5g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n7g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n6g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n8g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n1g.[116]akamai.[91]net.
+[114]g.akamai.[91]net.
+n3g.[116]akamai.[91]net.
+[169]n0g.[116]akamai.[91]net.
+[187]n4g.[116]akamai.[91]net.
+[205]n2g.[116]akamai.[91]net.
+[223]n5g.[116]akamai.[91]net.
+[241]n7g.[116]akamai.[91]net.
+[259]n6g.[116]akamai.[91]net.
+[277]n8g.[116]akamai.[91]net.
+[295]n1g.[116]akamai.[91]net.
+[313]n3g.[116]akamai.[91]net.
+www.addthis.com.
+www.addthis.com.
+[12]www.addthis.com.
+vp-www.addthis.com.
+[45]vp-www.addthis.com.
+[52]addthis.com.
+eur2.akam.net.
+[52]addthis.com.
+usc1.[98]akam.net.
+[52]addthis.com.
+usc2.[98]akam.net.
+[52]addthis.com.
+usw1.[98]akam.net.
+[52]addthis.com.
+usw6.[98]akam.net.
+[52]addthis.com.
+asia3.[98]akam.net.
+[52]addthis.com.
+ns1-33.[98]akam.net.
+[52]addthis.com.
+ns1-43.[98]akam.net.
+[93]eur2.akam.net.
+[120]usc1.[98]akam.net.
+[139]usc2.[98]akam.net.
+[158]usw1.[98]akam.net.
+[177]usw6.[98]akam.net.
+[196]asia3.[98]akam.net.
+[216]ns1-33.[98]akam.net.
+[237]ns1-43.[98]akam.net.
+n.thestar.com.
+n.thestar.com.
+[12]n.thestar.com.
+thestar.com.122.2o7.net.
+[43]thestar.com.122.2o7.net.
+[43]thestar.com.122.2o7.net.
+[43]thestar.com.122.2o7.net.
+[43]thestar.com.122.2o7.net.
+[43]thestar.com.122.2o7.net.
+[43]thestar.com.122.2o7.net.
+[59]2o7.net.
+ns1.sj1.omniture.com.
+[59]2o7.net.
+ns1.sj2.[184]omniture.com.
+[59]2o7.net.
+ns1.dal.[184]omniture.com.
+[176]ns1.sj1.omniture.com.
+[210]ns1.sj2.[184]omniture.com.
+[232]ns1.dal.[184]omniture.com.
+news.therecord.com.
+news.therecord.com.
+[12]news.therecord.com.
+therecord.com.
+ns1.thestar.[62]com.
+[52]therecord.com.
+ns2.[81]thestar.[62]com.
+[77]ns1.thestar.[62]com.
+[103]ns2.[81]thestar.[62]com.
+media.therecord.topscms.com.
+media.therecord.topscms.com.
+[12]media.therecord.topscms.com.
+media.therecord.topscms.com.edgesuite.net.
+[57]media.therecord.topscms.com.edgesuite.net.
+a847.g.akamai.[95]net.
+[112]a847.g.akamai.[95]net.
+[112]a847.g.akamai.[95]net.
+[117]g.akamai.[95]net.
+n2g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n5g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n7g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n6g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n8g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n1g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n3g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n0g.[119]akamai.[95]net.
+[117]g.akamai.[95]net.
+n4g.[119]akamai.[95]net.
+[172]n2g.[119]akamai.[95]net.
+[190]n5g.[119]akamai.[95]net.
+[208]n7g.[119]akamai.[95]net.
+[226]n6g.[119]akamai.[95]net.
+[244]n8g.[119]akamai.[95]net.
+[262]n1g.[119]akamai.[95]net.
+[280]n3g.[119]akamai.[95]net.
+[298]n0g.[119]akamai.[95]net.
+[316]n4g.[119]akamai.[95]net.
+media.therecord.com.
+www.goldbook.ca.
+media.therecord.com.
+[12]media.therecord.com.
+therecord.com.
+ns2.thestar.[63]com.
+[53]therecord.com.
+ns1.[82]thestar.[63]com.
+[78]ns2.thestar.[63]com.
+[104]ns1.[82]thestar.[63]com.
+www.goldbook.ca.
+[12]www.goldbook.ca.
+goldbook.ca.
+[45]goldbook.ca.
+[45]goldbook.ca.
+ns4.everydns.net.
+[45]goldbook.ca.
+ns1.[90]everydns.net.
+[45]goldbook.ca.
+ns2.[90]everydns.net.
+[45]goldbook.ca.
+ns3.[90]everydns.net.
+[86]ns4.everydns.net.
+[116]ns1.[90]everydns.net.
+[134]ns2.[90]everydns.net.
+[152]ns3.[90]everydns.net.
+torstardigital.122.2o7.net.
+torstardigital.122.2o7.net.
+[12]torstardigital.122.2o7.net.
+[12]torstardigital.122.2o7.net.
+[12]torstardigital.122.2o7.net.
+[12]torstardigital.122.2o7.net.
+[12]torstardigital.122.2o7.net.
+[12]torstardigital.122.2o7.net.
+2o7.net.
+ns1.dal.omniture.com.
+[140]2o7.net.
+ns1.sj1.[167]omniture.com.
+[140]2o7.net.
+ns1.sj2.[167]omniture.com.
+[159]ns1.dal.omniture.com.
+[193]ns1.sj1.[167]omniture.com.
+[215]ns1.sj2.[167]omniture.com.
+news.google.ca.
+news.google.ca.
+[12]news.google.ca.
+news.google.com.
+[44]news.google.com.
+news.l.[49]google.com.
+[73]news.l.[49]google.com.
+[78]l.[49]google.com.
+e.[78]l.[49]google.com.
+[78]l.[49]google.com.
+b.[78]l.[49]google.com.
+[78]l.[49]google.com.
+a.[78]l.[49]google.com.
+[78]l.[49]google.com.
+g.[78]l.[49]google.com.
+[78]l.[49]google.com.
+d.[78]l.[49]google.com.
+[78]l.[49]google.com.
+f.[78]l.[49]google.com.
+[110]e.[78]l.[49]google.com.
+[126]b.[78]l.[49]google.com.
+[142]a.[78]l.[49]google.com.
+[158]g.[78]l.[49]google.com.
+[174]d.[78]l.[49]google.com.
+[190]f.[78]l.[49]google.com.
+googleads.g.doubleclick.net.
+googleads.g.doubleclick.net.
+[12]googleads.g.doubleclick.net.
+pagead.l.doubleclick.net.
+[57]pagead.l.doubleclick.net.
+[64]l.doubleclick.net.
+g.l.google.com.
+[64]l.doubleclick.net.
+a.[113]l.google.com.
+[64]l.doubleclick.net.
+b.[113]l.google.com.
+[64]l.doubleclick.net.
+d.[113]l.google.com.
+[64]l.doubleclick.net.
+e.[113]l.google.com.
+[64]l.doubleclick.net.
+f.[113]l.google.com.
+[111]g.l.google.com.
+[139]a.[113]l.google.com.
+[155]b.[113]l.google.com.
+[171]d.[113]l.google.com.
+[187]e.[113]l.google.com.
+[203]f.[113]l.google.com.
+www.montrealgazette.com.
+www.montrealgazette.com.
+[12]www.montrealgazette.com.
+montrealgazette.com.
+ns2.canwest.[73]com.
+[57]montrealgazette.com.
+ns1.[92]canwest.[73]com.
+[88]ns2.canwest.[73]com.
+[114]ns1.[92]canwest.[73]com.
+a123.g.akamai.net.
+a123.g.akamai.net.
+[12]a123.g.akamai.net.
+[12]a123.g.akamai.net.
+members.canada.com.
+members.canada.com.
+[12]members.canada.com.
+canada.com.
+ns2.canwest.[59]com.
+[52]canada.com.
+ns1.[78]canwest.[59]com.
+[74]ns2.canwest.[59]com.
+[100]ns1.[78]canwest.[59]com.
+www.cbc.ca.
+www.cbc.ca.
+[12]www.cbc.ca.
+www.cbc.ca.edgesuite.net.
+[40]www.cbc.ca.edgesuite.net.
+a1849.gc.akamai.[61]net.
+[78]a1849.gc.akamai.[61]net.
+[78]a1849.gc.akamai.[61]net.
+[84]gc.akamai.[61]net.
+n6gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n1gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n4gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n8gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n2gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n0gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n7gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n5gc.[87]akamai.[61]net.
+[84]gc.akamai.[61]net.
+n3gc.[87]akamai.[61]net.
+[140]n6gc.[87]akamai.[61]net.
+[159]n1gc.[87]akamai.[61]net.
+[178]n4gc.[87]akamai.[61]net.
+[197]n8gc.[87]akamai.[61]net.
+[216]n2gc.[87]akamai.[61]net.
+[235]n0gc.[87]akamai.[61]net.
+[254]n7gc.[87]akamai.[61]net.
+[273]n5gc.[87]akamai.[61]net.
+[292]n3gc.[87]akamai.[61]net.
+a.cbc.ca.
+a.cbc.ca.
+[12]a.cbc.ca.
+ehg-cbc.hitbox.com.
+[38]ehg-cbc.hitbox.com.
+[46]hitbox.com.
+dns06.omniture.[53]com.
+[46]hitbox.com.
+dns05.[92]omniture.[53]com.
+[46]hitbox.com.
+dns04.[92]omniture.[53]com.
+[46]hitbox.com.
+dns03.[92]omniture.[53]com.
+[46]hitbox.com.
+dns02.[92]omniture.[53]com.
+[46]hitbox.com.
+dns01.[92]omniture.[53]com.
+[86]dns06.omniture.[53]com.
+[115]dns05.[92]omniture.[53]com.
+[135]dns04.[92]omniture.[53]com.
+[155]dns03.[92]omniture.[53]com.
+[175]dns02.[92]omniture.[53]com.
+[195]dns01.[92]omniture.[53]com.
+assets.loomia.com.
+assets.loomia.com.
+[12]assets.loomia.com.
+a.[19]loomia.com.
+[47]a.[19]loomia.com.
+[19]loomia.com.
+ns15.dnsmadeeasy.[26]com.
+[19]loomia.com.
+ns14.[84]dnsmadeeasy.[26]com.
+[19]loomia.com.
+ns12.[84]dnsmadeeasy.[26]com.
+[19]loomia.com.
+ns11.[84]dnsmadeeasy.[26]com.
+[19]loomia.com.
+ns13.[84]dnsmadeeasy.[26]com.
+[19]loomia.com.
+ns10.[84]dnsmadeeasy.[26]com.
+recs-social.loomia.com.
+recs-social.loomia.com.
+[12]recs-social.loomia.com.
+rec-assets.[24]loomia.com.
+[52]rec-assets.[24]loomia.com.
+[24]loomia.com.
+ns14.dnsmadeeasy.[31]com.
+[24]loomia.com.
+ns11.[98]dnsmadeeasy.[31]com.
+[24]loomia.com.
+ns12.[98]dnsmadeeasy.[31]com.
+[24]loomia.com.
+ns15.[98]dnsmadeeasy.[31]com.
+[24]loomia.com.
+ns10.[98]dnsmadeeasy.[31]com.
+[24]loomia.com.
+ns13.[98]dnsmadeeasy.[31]com.
+e1.clearspring.com.
+static-cache.loomia.com.
+static-cache.loomia.com.
+[12]static-cache.loomia.com.
+static-cache.loomia.com.edgesuite.net.
+[53]static-cache.loomia.com.edgesuite.net.
+a298.g.akamai.[87]net.
+[104]a298.g.akamai.[87]net.
+[104]a298.g.akamai.[87]net.
+[109]g.akamai.[87]net.
+n4g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n2g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n5g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n7g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n6g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n8g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n1g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n3g.[111]akamai.[87]net.
+[109]g.akamai.[87]net.
+n0g.[111]akamai.[87]net.
+[164]n4g.[111]akamai.[87]net.
+[182]n2g.[111]akamai.[87]net.
+[200]n5g.[111]akamai.[87]net.
+[218]n7g.[111]akamai.[87]net.
+[236]n6g.[111]akamai.[87]net.
+[254]n8g.[111]akamai.[87]net.
+[272]n1g.[111]akamai.[87]net.
+[290]n3g.[111]akamai.[87]net.
+[308]n0g.[111]akamai.[87]net.
+e1.clearspring.com.
+[12]e1.clearspring.com.
+[15]clearspring.com.
+usc2.akam.net.
+[15]clearspring.com.
+usw1.[69]akam.net.
+[15]clearspring.com.
+usw6.[69]akam.net.
+[15]clearspring.com.
+asia3.[69]akam.net.
+[15]clearspring.com.
+ns1-33.[69]akam.net.
+[15]clearspring.com.
+ns1-43.[69]akam.net.
+[15]clearspring.com.
+eur2.[69]akam.net.
+[15]clearspring.com.
+usc1.[69]akam.net.
+csi.gstatic.com.
+csi.gstatic.com.
+[12]csi.gstatic.com.
+csi.l.google.com.
+[45]csi.l.google.com.
+[49]l.google.com.
+b.[49]l.google.com.
+[49]l.google.com.
+e.[49]l.google.com.
+[49]l.google.com.
+d.[49]l.google.com.
+[49]l.google.com.
+a.[49]l.google.com.
+[49]l.google.com.
+f.[49]l.google.com.
+[49]l.google.com.
+g.[49]l.google.com.
+[91]b.[49]l.google.com.
+[107]e.[49]l.google.com.
+[123]d.[49]l.google.com.
+[139]a.[49]l.google.com.
+[155]f.[49]l.google.com.
+[171]g.[49]l.google.com.
+www.gstatic.com.
+www.gstatic.com.
+[12]www.gstatic.com.
+www2.l.google.com.
+[45]www2.l.google.com.
+[50]l.google.com.
+d.[50]l.google.com.
+[50]l.google.com.
+f.[50]l.google.com.
+[50]l.google.com.
+e.[50]l.google.com.
+[50]l.google.com.
+b.[50]l.google.com.
+[50]l.google.com.
+a.[50]l.google.com.
+[50]l.google.com.
+g.[50]l.google.com.
+[92]d.[50]l.google.com.
+[108]f.[50]l.google.com.
+[124]e.[50]l.google.com.
+[140]b.[50]l.google.com.
+[156]a.[50]l.google.com.
+[172]g.[50]l.google.com.
+i.ytimg.com.
+i.ytimg.com.
+[12]i.ytimg.com.
+ytimg.l.google.com.
+[41]ytimg.l.google.com.
+[47]l.google.com.
+b.[47]l.google.com.
+[47]l.google.com.
+a.[47]l.google.com.
+[47]l.google.com.
+g.[47]l.google.com.
+[47]l.google.com.
+d.[47]l.google.com.
+[47]l.google.com.
+f.[47]l.google.com.
+[47]l.google.com.
+e.[47]l.google.com.
+[89]b.[47]l.google.com.
+[105]a.[47]l.google.com.
+[121]g.[47]l.google.com.
+[137]d.[47]l.google.com.
+[153]f.[47]l.google.com.
+[169]e.[47]l.google.com.
+news.bbc.co.uk.
+news.bbc.co.uk.
+[12]news.bbc.co.uk.
+newswww.bbc.net.uk.
+[44]newswww.bbc.net.uk.
+[52]bbc.net.uk.
+ns0.rbsov.bbc.co.[60]uk.
+[52]bbc.net.uk.
+ns0.thdo.[102]bbc.co.[60]uk.
+[92]ns0.rbsov.bbc.co.[60]uk.
+[123]ns0.thdo.[102]bbc.co.[60]uk.
+node1.bbcimg.co.uk.
+node1.bbcimg.co.uk.
+[12]node1.bbcimg.co.uk.
+img.bbc.net.uk.
+[48]img.bbc.net.uk.
+[52]bbc.net.uk.
+ns0.rbsov.bbc.co.[60]uk.
+[52]bbc.net.uk.
+ns0.thdo.[102]bbc.co.[60]uk.
+[92]ns0.rbsov.bbc.co.[60]uk.
+[123]ns0.thdo.[102]bbc.co.[60]uk.
+newsimg.bbc.co.uk.
+newsimg.bbc.co.uk.
+[12]newsimg.bbc.co.uk.
+newsimg.bbc.net.uk.
+[47]newsimg.bbc.net.uk.
+news.bbc.co.uk.edgesuite.net.
+[79]news.bbc.co.uk.edgesuite.net.
+a1733.g.akamai.[104]net.
+[121]a1733.g.akamai.[104]net.
+[121]a1733.g.akamai.[104]net.
+[127]g.akamai.[104]net.
+n7g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n6g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n8g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n1g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n3g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n0g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n4g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n2g.[129]akamai.[104]net.
+[127]g.akamai.[104]net.
+n5g.[129]akamai.[104]net.
+[182]n7g.[129]akamai.[104]net.
+[200]n6g.[129]akamai.[104]net.
+[218]n8g.[129]akamai.[104]net.
+[236]n1g.[129]akamai.[104]net.
+[254]n3g.[129]akamai.[104]net.
+[272]n0g.[129]akamai.[104]net.
+[290]n4g.[129]akamai.[104]net.
+[308]n2g.[129]akamai.[104]net.
+[326]n5g.[129]akamai.[104]net.
+stats.bbc.co.uk.
+stats.bbc.co.uk.
+[12]stats.bbc.co.uk.
+[12]stats.bbc.co.uk.
+bbc.co.uk.
+ns1.thls.[65]bbc.co.uk.
+[65]bbc.co.uk.
+ns1.thdo.[65]bbc.co.uk.
+[65]bbc.co.uk.
+ns1.rbsov.[65]bbc.co.uk.
+[65]bbc.co.uk.
+ns1.[65]bbc.co.uk.
+[86]ns1.thls.[65]bbc.co.uk.
+[109]ns1.thdo.[65]bbc.co.uk.
+[132]ns1.rbsov.[65]bbc.co.uk.
+[156]ns1.[65]bbc.co.uk.
+visualscience.external.bbc.co.uk.
+js.revsci.net.
+visualscience.external.bbc.co.uk.
+[12]visualscience.external.bbc.co.uk.
+csvtm.interactionscience.com.
+[62]csvtm.interactionscience.com.
+[68]interactionscience.com.
+ns1.sj1.omniture.[87]com.
+[68]interactionscience.com.
+ns1.sj2.[128]omniture.[87]com.
+[68]interactionscience.com.
+ns1.dal.[128]omniture.[87]com.
+[120]ns1.sj1.omniture.[87]com.
+[151]ns1.sj2.[128]omniture.[87]com.
+[173]ns1.dal.[128]omniture.[87]com.
+js.revsci.net.
+[12]js.revsci.net.
+[15]revsci.net.
+ns3.p16.dynect.[22]net.
+[15]revsci.net.
+ns4.[63]p16.dynect.[22]net.
+[15]revsci.net.
+ns2.[63]p16.dynect.[22]net.
+[15]revsci.net.
+ns1.[63]p16.dynect.[22]net.
+pix04.revsci.net.
+pix04.revsci.net.
+[12]pix04.revsci.net.
+[18]revsci.net.
+ns4.p16.dynect.[25]net.
+[18]revsci.net.
+ns1.[66]p16.dynect.[25]net.
+[18]revsci.net.
+ns3.[66]p16.dynect.[25]net.
+[18]revsci.net.
+ns2.[66]p16.dynect.[25]net.
+pixel.quantserve.com.
+pixel.quantserve.com.
+[12]pixel.quantserve.com.
+map-pb.quantserve.com.akadns.net.
+[50]map-pb.quantserve.com.akadns.net.
+ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[96]ac-na.[57]quantserve.com.akadns.net.
+[72]akadns.net.
+za.akadns.org.
+[72]akadns.net.
+zb.[231]akadns.org.
+[72]akadns.net.
+zc.[231]akadns.org.
+[72]akadns.net.
+zd.[231]akadns.org.
+[72]akadns.net.
+asia9.[72]akadns.net.
+[72]akadns.net.
+eur1.[72]akadns.net.
+[72]akadns.net.
+use3.[72]akadns.net.
+[72]akadns.net.
+use4.[72]akadns.net.
+[72]akadns.net.
+usw2.[72]akadns.net.
+[228]za.akadns.org.
+[255]zb.[231]akadns.org.
+[272]zc.[231]akadns.org.
+[289]zd.[231]akadns.org.
+[306]asia9.[72]akadns.net.
+[326]eur1.[72]akadns.net.
+[345]use3.[72]akadns.net.
+www.vancouversun.com.
+www.vancouversun.com.
+[12]www.vancouversun.com.
+vancouversun.com.
+ns1.canwest.[67]com.
+[54]vancouversun.com.
+ns2.[86]canwest.[67]com.
+[82]ns1.canwest.[67]com.
+[108]ns2.[86]canwest.[67]com.
+www.scan.nowpublic.com.
+www.scan.nowpublic.com.
+[12]www.scan.nowpublic.com.
+a1.panthercdn.com.
+[52]a1.panthercdn.com.
+[55]panthercdn.com.
+ns1.[55]panthercdn.com.
+[55]panthercdn.com.
+ns2.[55]panthercdn.com.
+[99]ns1.[55]panthercdn.com.
+[117]ns2.[55]panthercdn.com.
+a123.g.akamai.net.
+a123.g.akamai.net.
+[12]a123.g.akamai.net.
+[12]a123.g.akamai.net.
+feeds.theplatform.com.
+canwestglobal.112.2o7.net.
+beacon.securestudies.com.
+canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+[12]canwestglobal.112.2o7.net.
+2o7.net.
+ns1.sj1.omniture.com.
+[155]2o7.net.
+ns1.sj2.[182]omniture.com.
+[155]2o7.net.
+ns1.dal.[182]omniture.com.
+[174]ns1.sj1.omniture.com.
+[208]ns1.sj2.[182]omniture.com.
+[230]ns1.dal.[182]omniture.com.
+beacon.securestudies.com.
+[12]beacon.securestudies.com.
+beacon.gta.securestudies.com.
+[54]beacon.gta.securestudies.com.
+[61]gta.securestudies.com.
+gta02.ord.[65]securestudies.com.
+[61]gta.securestudies.com.
+gta01.iad.[65]securestudies.com.
+[61]gta.securestudies.com.
+gta01.[118]ord.[65]securestudies.com.
+[61]gta.securestudies.com.
+gta02.[142]iad.[65]securestudies.com.
+[112]gta02.ord.[65]securestudies.com.
+[136]gta01.iad.[65]securestudies.com.
+[160]gta01.[118]ord.[65]securestudies.com.
+[180]gta02.[142]iad.[65]securestudies.com.
+feeds.theplatform.com.
+[12]feeds.theplatform.com.
+[18]theplatform.com.
+sea1tpgtm01.[18]theplatform.com.
+[18]theplatform.com.
+bfi1tpgtm01.[18]theplatform.com.
+[67]sea1tpgtm01.[18]theplatform.com.
+[93]bfi1tpgtm01.[18]theplatform.com.
+s7.addthis.com.
+s7.addthis.com.
+[12]s7.addthis.com.
+wildcard.addthis.com.edgekey.net.
+[44]wildcard.addthis.com.edgekey.net.
+e2943.c.akamaiedge.[73]net.
+[90]e2943.c.akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n5c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n8c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n0c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n6c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n1c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n4c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n3c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n2c.[98]akamaiedge.[73]net.
+[96]c.akamaiedge.[73]net.
+n7c.[98]akamaiedge.[73]net.
+[139]n5c.[98]akamaiedge.[73]net.
+[157]n8c.[98]akamaiedge.[73]net.
+[175]n0c.[98]akamaiedge.[73]net.
+[193]n6c.[98]akamaiedge.[73]net.
+[211]n1c.[98]akamaiedge.[73]net.
+[229]n4c.[98]akamaiedge.[73]net.
+[247]n3c.[98]akamaiedge.[73]net.
+[265]n2c.[98]akamaiedge.[73]net.
+[283]n7c.[98]akamaiedge.[73]net.
+ad.doubleclick.net.
+ad.doubleclick.net.
+[12]ad.doubleclick.net.
+dart-ad.l.doubleclick.net.
+[48]dart-ad.l.doubleclick.net.
+[48]dart-ad.l.doubleclick.net.
+[56]l.doubleclick.net.
+g.l.google.com.
+[56]l.doubleclick.net.
+a.[121]l.google.com.
+[56]l.doubleclick.net.
+b.[121]l.google.com.
+[56]l.doubleclick.net.
+d.[121]l.google.com.
+[56]l.doubleclick.net.
+e.[121]l.google.com.
+[56]l.doubleclick.net.
+f.[121]l.google.com.
+[119]g.l.google.com.
+[147]a.[121]l.google.com.
+[163]b.[121]l.google.com.
+[179]d.[121]l.google.com.
+[195]e.[121]l.google.com.
+[211]f.[121]l.google.com.
+*** SUCCESS ***
diff --git a/test/btscan1.lm b/test/btscan1.lm
new file mode 100644
index 0000000..558b890
--- /dev/null
+++ b/test/btscan1.lm
@@ -0,0 +1,47 @@
+##### LM #####
+#
+# R1
+#
+namespace r1
+
+ lex
+ literal `! `a `b
+ ignore /[ \n\t]+/
+ end
+
+ def line [ `! `a `b `b `a]
+
+end # r1
+
+#
+# R2
+#
+namespace r2
+
+ lex
+ literal `!
+ token id /[a-zA-Z_]+/
+ ignore /[ \n\t]+/
+ end
+
+ def line [ `! id ]
+
+end # r2
+
+def item
+ [r1::line]
+| [r2::line]
+
+def btscan
+ [item*]
+
+parse P: btscan[ stdin ]
+
+match P ~!abb !abba !aab
+print_xml(P)
+print( '\n' )
+##### IN #####
+!abb !abba !aab
+
+##### EXP #####
+<btscan><_repeat_item><item><r2::line><r2::_literal_0009>!</r2::_literal_0009><r2::id>abb</r2::id></r2::line></item><item><r1::line><r1::_literal_0001>!</r1::_literal_0001><r1::_literal_0003>a</r1::_literal_0003><r1::_literal_0005>b</r1::_literal_0005><r1::_literal_0005>b</r1::_literal_0005><r1::_literal_0003>a</r1::_literal_0003></r1::line></item><item><r2::line><r2::_literal_0009>!</r2::_literal_0009><r2::id>aab</r2::id></r2::line></item></_repeat_item></btscan>
diff --git a/test/btscan2.lm b/test/btscan2.lm
new file mode 100644
index 0000000..bfc77db
--- /dev/null
+++ b/test/btscan2.lm
@@ -0,0 +1,42 @@
+##### LM #####
+namespace r1
+
+ lex
+ literal `! `a `b
+ ignore /[ \n\t]+/
+ end
+
+ def line [ `! `a `b `b `a]
+
+end # r1
+
+namespace r2
+
+ lex
+ literal `!
+ token id /[a-zA-Z_]+/
+ ignore /[ \n\t]+/
+ end
+
+ def line [ `! id ]
+
+end # r2
+
+def item
+ [r1::line]
+| [r2::line]
+
+def btscan
+ [item*]
+
+cons Parser: parser<btscan> []
+
+send Parser "!ab"
+send Parser "b "
+send Parser "!ab"
+send Parser "ba !aab\n"
+
+print_xml( Parser() )
+print( '\n' )
+##### EXP #####
+<btscan><_repeat_item><item><r2::line><r2::_literal_0009>!</r2::_literal_0009><r2::id>abb</r2::id></r2::line></item><item><r1::line><r1::_literal_0001>!</r1::_literal_0001><r1::_literal_0003>a</r1::_literal_0003><r1::_literal_0005>b</r1::_literal_0005><r1::_literal_0005>b</r1::_literal_0005><r1::_literal_0003>a</r1::_literal_0003></r1::line></item><item><r2::line><r2::_literal_0009>!</r2::_literal_0009><r2::id>aab</r2::id></r2::line></item></_repeat_item></btscan>
diff --git a/test/call1.lm b/test/call1.lm
new file mode 100644
index 0000000..82f6c76
--- /dev/null
+++ b/test/call1.lm
@@ -0,0 +1,17 @@
+##### LM #####
+int f1( i: int j: int )
+{
+ return i + j
+}
+
+int main()
+{
+ print( f1(
+ f1( f1( 1 1 ) f1( 1 1 ) )
+ f1( f1( 1 1 ) f1( 1 1 ) )
+ ) '\n' )
+}
+
+main()
+##### EXP #####
+8
diff --git a/test/commitbt.lm b/test/commitbt.lm
new file mode 100644
index 0000000..da78807
--- /dev/null
+++ b/test/commitbt.lm
@@ -0,0 +1,109 @@
+##### LM #####
+# 2010: I'm not sure what the following means.
+
+#
+# Local commit:
+# -clears reparse flags underneath
+# -must be possible to backtrack after
+# Global commit (revertOn)
+# -clears all reparse flags
+# -must be possible to backtrack after
+# Global commit (!revertOn)
+# -clears all reparse flags
+# -clears all 'parsed' reverse code
+# -clears all reverse code
+# -clears all alg structures
+#
+
+# This test shows that a global commit with revertOn correctly does not clear
+# 'parsed' items because it must entertain the possibility of backtracking.
+
+lex
+ ignore /[\t\n ]+/
+ literal `^ `| `- `, `: `! `? `.
+ literal `( `) `{ `} `* `& `+
+
+ literal `-- `:> `:>> `<: `-> `**
+
+ token word /[a-zA-Z_][a-zA-Z0-9_]*/
+ token uint /[0-9]+/
+end
+
+
+def expression [term expression_op*]
+
+def expression_op
+ [`| term]
+| [`& term]
+| [`- term]
+| [`-- term]
+
+def term [factor_rep term_rest]
+
+# This list is done manually to get shortest match.
+def term_rest
+ []
+| [term_op term_rest]
+
+def term_op
+ [factor_rep]
+| [`. factor_rep]
+| [`:> factor_rep]
+| [`:>> factor_rep]
+| [`<: factor_rep]
+
+def factor_rep
+ [factor_neg factor_rep_op*]
+
+def factor_rep_op
+ [`*]
+| [`**]
+| [`?]
+| [`+]
+| [`{ factor_rep_num `}]
+| [`{ `, factor_rep_num `}]
+| [`{ factor_rep_num `, `}]
+| [`{ factor_rep_num `, factor_rep_num `}]
+
+def factor_rep_num [uint]
+
+def factor_neg
+ [`! factor_neg]
+| [`^ factor_neg]
+| [factor]
+
+def factor
+ [alphabet_num]
+| [word]
+| [`( expression `)]
+
+def alphabet_num
+ [uint]
+
+def suint
+ i: int
+ [uint]
+
+def sub
+ [suint* `*]
+
+token item
+ S: sub
+ /[0-9]+/
+ {
+ M: str = input.pull(match_length)
+ parse_stop S: sub[input]
+ input.push( make_token( typeid<item> M S ) )
+ }
+
+def stuff
+ [item* `!]
+| [sub]
+
+parse S: stuff[ stdin ]
+print_xml( S )
+print( '\n' )
+##### IN #####
+1 2 3 * !
+##### EXP #####
+<stuff><_repeat_item><item>1</item></_repeat_item><_literal_000d>!</_literal_000d></stuff>
diff --git a/test/concat1.lm b/test/concat1.lm
new file mode 100644
index 0000000..ee92409
--- /dev/null
+++ b/test/concat1.lm
@@ -0,0 +1,100 @@
+##### LM #####
+
+lex
+ literal `type `include
+ token id /[A-Za-z_][A-Za-z_0-9]*/
+ ignore /'#' [^\n]* '\n'/
+ ignore /[ \t\r\n]+/
+end
+
+lex
+ token ifn_part /[a-zA-Z0-9_.\-]+/
+ token ifn_slash /'/'/
+end
+
+def ifn_path_part
+ [ifn_part]
+| [ifn_slash]
+
+def ifn_path
+ [ifn_path_part ifn_path]
+| [ifn_path_part]
+
+
+literal `%%
+
+lex
+ token em_ws /( any - 33..126 )+/
+end
+
+def em_item
+ [em_ws]
+
+def prelude
+ [em_item* `%%]
+
+def item
+ [`include ifn_path]
+| [`type id]
+
+def start
+ [prelude item*]
+
+start parseStart( InputFile: stream )
+{
+ return parse start[ InputFile ]
+}
+
+start parseTxt( T: str )
+{
+ cons a: parser<start>[]
+ send a [T] eos
+ return a.tree
+}
+
+item* concatItems( IL1: item* IL2: item* )
+{
+ for IL: item* in IL1 {
+ if match IL [] {
+ IL = IL2
+ break
+ }
+ }
+ return IL1
+}
+
+item* expandIncludes( ItemList: ref<item*> )
+{
+ for IL: item* in ItemList {
+ if match IL
+ [`include FN: ifn_path Rest: item*]
+ {
+ S: start = parseTxt(
+ "
+ "%%
+ "
+ )
+
+ match S [em_item* `%% IncludedItems: item*]
+
+ IL = concatItems( IncludedItems Rest )
+ }
+ }
+}
+
+int main()
+{
+ S: start = parseStart(stdin)
+ match S [em_item* `%% ItemList: item*]
+ expandIncludes( ItemList )
+}
+
+main()
+##### IN #####
+
+%%
+
+type foo
+
+include smtp.vpt
+##### EXP #####
diff --git a/test/concat2.lm b/test/concat2.lm
new file mode 100644
index 0000000..781c2f2
--- /dev/null
+++ b/test/concat2.lm
@@ -0,0 +1,98 @@
+##### LM #####
+
+lex
+ literal `type `include
+ token id /[A-Za-z_][A-Za-z_0-9]*/
+ ignore /'#' [^\n]* '\n'/
+ ignore /[ \t\r\n]+/
+end
+
+lex
+ token ifn_part /[a-zA-Z0-9_.\-]+/
+ token ifn_slash /'/'/
+end
+
+def ifn_path_part
+ [ifn_part]
+| [ifn_slash]
+
+def ifn_path
+ [ifn_path_part ifn_path]
+| [ifn_path_part]
+
+
+literal `%%
+
+lex
+ token em_ws /( any - 33..126 )+/
+end
+
+def em_item
+ [em_ws]
+
+def prelude
+ [em_item* `%%]
+
+def item
+ [`include ifn_path]
+| [`type id]
+
+def start
+ [prelude item*]
+
+start parseStart( InputFile: stream )
+{
+ return parse start[ InputFile ]
+}
+
+start parseTxt( T: str )
+{
+ cons a: accum<start>[]
+ send a [T] eos
+ return a.tree
+}
+
+item* concatItems( IL1: item* IL2: item* )
+{
+ for IL: item* in IL1 {
+ if match IL [] {
+ IL = IL2
+ break
+ }
+ }
+ return IL1
+}
+
+item* expandIncludes( ItemList: ref<item*> )
+{
+ for IL: item* in ItemList {
+ if match IL
+ [`include FN: ifn_path Rest: item*]
+ {
+ S: start = parseTxt(
+ "
+ "%%
+ "
+ )
+
+ match S [em_item* `%% IncludedItems: item*]
+
+ IL = concatItems( IncludedItems Rest )
+ }
+ }
+}
+
+int main()
+{
+ S: start = parseStart(stdin)
+ match S [em_item* `%% ItemList: item*]
+ expandIncludes( ItemList )
+}
+
+main()
+##### IN #####
+
+%%
+
+include smtp.vpt
+##### EXP #####
diff --git a/test/construct1.lm b/test/construct1.lm
new file mode 100644
index 0000000..ee9b36b
--- /dev/null
+++ b/test/construct1.lm
@@ -0,0 +1,19 @@
+##### LM #####
+rl ident_pattern /[a-zA-Z_][a-zA-Z_0-9]*/
+rl number_pattern /[0-9]+/
+
+lex
+ ignore /[ \t\n]+/
+ token id /ident_pattern/
+ token number /number_pattern/
+end
+
+def four_ids
+ [id id id id]
+
+Constructed: four_ids = construct four_ids "a b c d"
+print_xml( Constructed )
+print( '\n' )
+
+##### EXP #####
+<four_ids><id>a</id><id>b</id><id>c</id><id>d</id></four_ids>
diff --git a/test/construct2.lm b/test/construct2.lm
new file mode 100644
index 0000000..fd60e9d
--- /dev/null
+++ b/test/construct2.lm
@@ -0,0 +1,14 @@
+##### LM #####
+
+lex
+ ignore /[ \t\n]+/
+ token id /[a-z]+/
+ literal `, `. `* `( `)
+end
+
+def lang [id*]
+
+print( construct lang "a b c" '\n' )
+
+##### EXP #####
+a b c
diff --git a/test/construct3.lm b/test/construct3.lm
new file mode 100644
index 0000000..8edb038
--- /dev/null
+++ b/test/construct3.lm
@@ -0,0 +1,19 @@
+##### LM #####
+
+lex
+ ignore /[ \t\n]+/
+ token id /[a-z0-9]+/
+ literal `, `. `* `( `)
+end
+
+def bigger [`( item* `)]
+
+def item [id] | [bigger]
+
+def lang [item*]
+
+B: bigger = construct bigger "( b1 b2 )"
+print( construct lang "a [B] c" '\n' )
+
+##### EXP #####
+a ( b1 b2 ) c
diff --git a/test/constructex.lm b/test/constructex.lm
new file mode 100644
index 0000000..3596363
--- /dev/null
+++ b/test/constructex.lm
@@ -0,0 +1,44 @@
+##### LM #####
+lex
+ token id /[a-zA-Z_][a-zA-Z0-9_]*/
+ literal `= `< `> `/
+ ignore /[ \t\n\r\v]+/
+end
+
+def attr
+ [id `= id]
+
+def open_tag
+ [`< id attr* `>]
+
+def close_tag
+ [`< `/ id `>]
+
+def tag
+ [open_tag item* close_tag]
+
+def item
+ [tag]
+| [id]
+
+parse PersonTag: tag[ stdin ]
+
+match PersonTag
+ ["<person name=" Val:id attr*">" item* "</person>"]
+
+NameTag1: tag = construct tag
+ ["<name type=person>" ^Val "</name>"]
+
+NameTag2: tag = construct tag
+ "<name type=person>[^Val]</name>"
+
+print( NameTag1 '\n' )
+print( NameTag2 '\n' )
+
+##### IN #####
+<person name=adrian hometown=kingston>
+ <t1 foo=bar2 e=f></t2>
+</person>
+##### EXP #####
+<name type=person>adrian</name>
+<name type=person>adrian</name>
diff --git a/test/context1.lm b/test/context1.lm
new file mode 100644
index 0000000..2832045
--- /dev/null
+++ b/test/context1.lm
@@ -0,0 +1,39 @@
+##### LM #####
+
+context ctx
+ i: int
+ j: int
+ k: int
+
+ lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+ end
+
+ def foo [id]
+
+ def item
+ [id]
+ | [foo]
+ | [`( item* `)]
+ {
+ i = 0
+ j = i + 1
+ k = j + 1
+ print( k '\n' )
+ }
+
+ def start
+ [item*]
+end # ctx
+
+cons CTX: ctx[]
+parse Input: ctx::start( CTX ) [ stdin ]
+print( Input )
+
+##### IN #####
+a b c ( d e f )
+##### EXP #####
+2
+a b c ( d e f )
diff --git a/test/context2.lm b/test/context2.lm
new file mode 100644
index 0000000..e04354a
--- /dev/null
+++ b/test/context2.lm
@@ -0,0 +1,124 @@
+##### LM #####
+context ruby_here
+
+ rl ident_pattern /[a-zA-Z_][a-zA-Z_0-9]*/
+ rl number_pattern /[0-9]+/
+
+ lex
+ ignore /[ \t\n]+/
+ token id /ident_pattern/
+ token number /number_pattern/
+ literal `<< `* `, `( `) `!
+ end
+
+ HereId: str
+
+ token rest_of_line /[^\n]*'\n'/
+
+ lex
+ ignore /[ \t\n]+/
+ token here_id
+ HereData: here_data
+ /ident_pattern/
+ {
+ # Take the text of the here_id from the input stream.
+ HereId = input.pull( match_length )
+
+ # Get the data up to the rest of the line.
+ parse_stop ROL: rest_of_line(ctx)[ input ]
+
+ # Parse the heredoc data.
+ parse_stop HereData: here_data(ctx)[ input ]
+
+ # Push the rest-of-line data back to the input stream.
+ input.push( $ROL )
+
+ # Send the here_id token. Attach the heredoc data as an attribute.
+ input.push( make_token( typeid<here_id> HereId HereData ) )
+ }
+ end
+
+ lex
+ token here_close_id
+ / ident_pattern '\n' /
+ {
+ if match_text == HereId + '\n' {
+ input.push( make_token(
+ typeid<here_close_id>
+ input.pull( match_length ) ) )
+ }
+ else
+ input.push( make_token( typeid<here_line> input.pull(match_length) ) )
+ }
+
+ token here_line
+ / [^\n]* '\n' /
+ end
+
+ def here_data
+ [here_line* here_close_id]
+
+ def heredoc
+ [`<< here_id]
+
+ def primary
+ [id]
+ | [number]
+ | [heredoc]
+
+ def arglist
+ [primary arglist_more*]
+
+ def arglist_more
+ [`, primary]
+
+ def call
+ [id `( arglist? `)]
+
+ def statement
+ [primary]
+ | [call]
+
+ token foobar /any+/
+
+ def item
+ [statement `!]
+ | [foobar]
+
+ def start
+ [item*]
+
+end # ruby_here
+
+CTX: ruby_here = cons ruby_here []
+
+parse S: ruby_here::start( CTX ) [ stdin ]
+print_xml(S)
+print('\n')
+##### IN #####
+print( <<DATA1, more, <<DATA2, 99 )
+"&^#(@ almost
+!arbitrary text!
+DATA1
+hello
+world
+DATA2
+!
+print( <<DATA1, more, <<DATA2, 99 )
+"&^#(@ almost
+!arbitrary text!
+DATA1
+hello
+world
+DATA2
+# error here
+##### EXP #####
+<ruby_here::start><ruby_here::_repeat_item><ruby_here::item><ruby_here::statement><ruby_here::call><ruby_here::id>print</ruby_here::id><ruby_here::_literal_000d>(</ruby_here::_literal_000d><ruby_here::_opt_arglist><ruby_here::arglist><ruby_here::primary><ruby_here::heredoc><ruby_here::_literal_0007>&lt;&lt;</ruby_here::_literal_0007><ruby_here::here_id>DATA1</ruby_here::here_id></ruby_here::heredoc></ruby_here::primary><ruby_here::_repeat_arglist_more><ruby_here::arglist_more><ruby_here::_literal_000b>,</ruby_here::_literal_000b><ruby_here::primary><ruby_here::id>more</ruby_here::id></ruby_here::primary></ruby_here::arglist_more><ruby_here::arglist_more><ruby_here::_literal_000b>,</ruby_here::_literal_000b><ruby_here::primary><ruby_here::heredoc><ruby_here::_literal_0007>&lt;&lt;</ruby_here::_literal_0007><ruby_here::here_id>DATA2</ruby_here::here_id></ruby_here::heredoc></ruby_here::primary></ruby_here::arglist_more><ruby_here::arglist_more><ruby_here::_literal_000b>,</ruby_here::_literal_000b><ruby_here::primary><ruby_here::number>99</ruby_here::number></ruby_here::primary></ruby_here::arglist_more></ruby_here::_repeat_arglist_more></ruby_here::arglist></ruby_here::_opt_arglist><ruby_here::_literal_000f>)</ruby_here::_literal_000f></ruby_here::call></ruby_here::statement><ruby_here::_literal_0011>!</ruby_here::_literal_0011></ruby_here::item><ruby_here::item><ruby_here::foobar>print( &lt;&lt;DATA1, more, &lt;&lt;DATA2, 99 )
+"&amp;^#(@ almost
+!arbitrary text!
+DATA1
+hello
+world
+DATA2
+# error here
+</ruby_here::foobar></ruby_here::item></ruby_here::_repeat_item></ruby_here::start>
diff --git a/test/context3.lm b/test/context3.lm
new file mode 100644
index 0000000..f990837
--- /dev/null
+++ b/test/context3.lm
@@ -0,0 +1,47 @@
+##### LM #####
+context ctx
+
+ i: int
+ j: int
+ k: int
+
+ lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+ end
+
+ def foo [id]
+
+ int f()
+ {
+ i = i + 1
+ }
+
+ def item
+ [id]
+ | [foo]
+ | [`( item* `)]
+ {
+ i = 0
+ f()
+ f()
+ f()
+ print( i '\n' )
+ }
+
+
+ def start
+ [item*]
+
+end # ctx
+
+CTX: ctx = cons ctx []
+parse Input: ctx::start( CTX ) [stdin]
+print( Input )
+
+##### IN #####
+a b c ( d ) e f
+##### EXP #####
+3
+a b c ( d ) e f
diff --git a/test/counting1.lm b/test/counting1.lm
new file mode 100644
index 0000000..83e70d5
--- /dev/null
+++ b/test/counting1.lm
@@ -0,0 +1,109 @@
+##### LM #####
+context counting
+
+ #
+ # Regular Definitions
+ #
+ rl rl_ws /[ \t\n\r\v]+/
+ rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+ rl rl_num /[0-9]+/
+
+ #
+ # Tokens
+ #
+
+ lex
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ # Tokens.
+ token id /rl_id/
+ token number /rl_num/
+ end
+
+ #
+ # Global Data
+ #
+
+ target: int
+
+ #
+ # Productions
+ #
+
+
+ def get_target
+ [number]
+ {
+ match lhs [Number:number]
+ target = Number.data.atoi()
+ }
+
+ # Arbitrary item.
+ def item
+ [number]
+ | [id]
+
+ # Type definition for the count_items nonterminal.
+ def count_items
+ count: int
+
+ # List production one. The condition stops the
+ # greedy list when it has gone too far.
+ [count_items item]
+ {
+ # Pass up the data
+ lhs.count = r1.count + 1
+ if lhs.count > target {
+ reject
+ }
+ }
+
+ # List production two, the base.
+ | []
+ {
+ lhs.count = 0
+ }
+
+ # Wrapper which prevents short lists from getting through if the parser
+ # encounters an error and needs to backtrack over counted list.
+ def counted_list
+ [get_target count_items]
+ {
+ if r2.count < target {
+ reject
+ }
+ }
+
+ def start
+ [counted_list*]
+ {
+ for List:counted_list in lhs {
+ match List [Count:number Items:count_items]
+ print( 'num items: ' Count.data.atoi() '\n' )
+
+ i: int = 1
+ for Item:item in Items {
+ print( ' item ' i ': ' ^Item '\n' )
+ i = i + 1
+ }
+ }
+ }
+end # counting
+
+cons Counting: counting[]
+parse counting::start(Counting)[ stdin ]
+##### IN #####
+3 1 b c 1 1 0 3 a b c
+##### EXP #####
+num items: 3
+ item 1: 1
+ item 2: b
+ item 3: c
+num items: 1
+ item 1: 1
+num items: 0
+num items: 3
+ item 1: a
+ item 2: b
+ item 3: c
diff --git a/test/counting2.lm b/test/counting2.lm
new file mode 100644
index 0000000..0ca75be
--- /dev/null
+++ b/test/counting2.lm
@@ -0,0 +1,98 @@
+##### LM #####
+
+#
+# Regular Definitions
+#
+
+rl rl_ws /[ \t\n\r\v]+/
+rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+rl rl_num /[0-9]+/
+
+#
+# Tokens
+#
+
+lex
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ # Tokens.
+ token id /rl_id/
+ token number /rl_num/
+end
+
+#
+# Productions
+#
+
+# Arbitrary item.
+def item
+ [id]
+| [number]
+
+# List production one. The condition stops the
+# greedy list when it has gone too far.
+def count_items
+ target: int
+ count: int
+
+ [count_items item]
+ {
+ # Pass up the data
+ lhs.target = r1.target
+ lhs.count = r1.count + 1
+
+ if lhs.count > lhs.target {
+ reject
+ }
+ }
+
+ # List production two, the base.
+| [number]
+ {
+ match lhs [Number: number]
+ lhs.target = Number.data.atoi()
+ lhs.count = 0
+ }
+
+
+# Wrapper which prevents short lists from getting through if the parser
+# encounters an error and needs to backtrack over counted list.
+def counted_list
+ [count_items]
+ {
+ if r1.count < r1.target {
+ reject
+ }
+ }
+
+def start
+ [counted_list*]
+ {
+ for List: counted_list in lhs {
+ match List [CountItems:count_items]
+ print( 'num items: ' CountItems.target '\n' )
+
+ i: int = 1
+ for Item:item in CountItems {
+ print( ' item ' i ': ' ^Item '\n' )
+ i = i + 1
+ }
+ }
+ }
+
+parse start[ stdin ]
+##### IN #####
+3 1 b c 1 1 0 3 a b c
+##### EXP #####
+num items: 3
+ item 1: 1
+ item 2: b
+ item 3: c
+num items: 1
+ item 1: 1
+num items: 0
+num items: 3
+ item 1: a
+ item 2: b
+ item 3: c
diff --git a/test/counting3.lm b/test/counting3.lm
new file mode 100644
index 0000000..027f456
--- /dev/null
+++ b/test/counting3.lm
@@ -0,0 +1,130 @@
+##### LM #####
+context counting
+
+ #
+ # Regular Definitions
+ #
+ rl rl_ws /[ \t\n\r\v]+/
+ rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+ rl rl_num /[0-9]+/
+
+ #
+ # Tokens
+ #
+
+ lex
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ literal `;
+
+ # Tokens.
+ token id /rl_id/
+ token number /rl_num/
+ end
+
+ #
+ # Global Data
+ #
+
+ target: int
+ count: int
+
+ #
+ # Productions
+ #
+
+ def get_target
+ [number]
+ {
+ count = 0
+ target = r1.data.atoi()
+ print( 'target: ' target '\n' )
+ }
+
+ # Arbitrary item.
+ def item
+ [number]
+ | [id]
+
+ def count_items
+ [one_item count_items]
+ | []
+
+ def one_item
+ [item]
+ {
+ count = count + 1
+ if count > target {
+ reject
+ }
+ print( 'ITEM\n' )
+ }
+
+
+ # Wrapper which prevents short lists from getting through if the parser
+ # encounters an error and needs to backtrack over counted list.
+ def counted_list
+ [get_target count_items]
+ {
+ print( 'trying: ' count ' for: ' target '\n' )
+ if count < target {
+ reject
+ }
+ }
+
+
+ def start
+ [counted_list*]
+ {
+
+ for List: counted_list in lhs {
+ match List [Count: number Items: count_items]
+ print( 'num items: ' Count.data.atoi() '\n' )
+
+ i: int = 1
+ for Item: item in Items {
+ print( ' item ' i ': ' ^Item '\n' )
+ i = i + 1
+ }
+ }
+ print( '*** SUCCESS ***\n' )
+ }
+
+end # counting
+
+cons Counting: counting[]
+parse counting::start(Counting)[ stdin ]
+##### IN #####
+3 1 b c 1 1 0 3 a b c
+##### EXP #####
+target: 3
+ITEM
+ITEM
+ITEM
+ITEM
+trying: 3 for: 3
+target: 1
+ITEM
+ITEM
+trying: 1 for: 1
+target: 0
+ITEM
+trying: 0 for: 0
+target: 3
+ITEM
+ITEM
+ITEM
+trying: 3 for: 3
+num items: 3
+ item 1: 1
+ item 2: b
+ item 3: c
+num items: 1
+ item 1: 1
+num items: 0
+num items: 3
+ item 1: a
+ item 2: b
+ item 3: c
+*** SUCCESS ***
diff --git a/test/counting4.lm b/test/counting4.lm
new file mode 100644
index 0000000..ef9f87f
--- /dev/null
+++ b/test/counting4.lm
@@ -0,0 +1,111 @@
+##### LM #####
+context counting
+
+ #
+ # Regular Definitions
+ #
+ rl rl_ws /[ \t\n\r\v]+/
+ rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+ rl rl_num /[0-9]+/
+
+ #
+ # Tokens
+ #
+
+ lex
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ literal `;
+
+ # Tokens.
+ token id /rl_id/
+ token number /rl_num/
+ end
+
+ #
+ # Global Data
+ #
+
+ target: int
+ count: int
+
+ #
+ # Productions
+ #
+
+
+ def get_target
+ [number]
+ {
+ count = 0
+ target = r1.data.atoi()
+ print( 'target: ' target '\n' )
+ }
+
+ # Arbitrary item.
+ def item
+ [number]
+ | [id]
+
+ def count_items
+ [count_inc item count_items]
+ | [count_end]
+
+ def count_inc
+ []
+ {
+ if count < target
+ count = count + 1
+ else
+ reject
+ }
+
+ def count_end
+ []
+ {
+ if count < target
+ reject
+ }
+
+ def counted_list
+ [get_target count_items]
+
+ def start
+ [counted_list*]
+ {
+ for List: counted_list in lhs {
+ match List [Count: number Items: count_items]
+ print( 'num items: ' Count.data.atoi() '\n' )
+
+ i: int = 1
+ for Item: item in Items {
+ print( ' item ' i ': ' ^Item '\n' )
+ i = i + 1
+ }
+ }
+ print( '*** SUCCESS ***\n' )
+ }
+end # counting
+
+cons Counting: counting[]
+parse counting::start(Counting)[stdin]
+##### IN #####
+3 1 b c 1 1 0 3 a b c
+##### EXP #####
+target: 3
+target: 1
+target: 0
+target: 3
+num items: 3
+ item 1: 1
+ item 2: b
+ item 3: c
+num items: 1
+ item 1: 1
+num items: 0
+num items: 3
+ item 1: a
+ item 2: b
+ item 3: c
+*** SUCCESS ***
diff --git a/test/decl1.lm b/test/decl1.lm
new file mode 100644
index 0000000..2d3c03b
--- /dev/null
+++ b/test/decl1.lm
@@ -0,0 +1,5 @@
+##### LM #####
+Int: int = 7
+print( Int '\n' )
+##### EXP #####
+7
diff --git a/test/decl2.lm b/test/decl2.lm
new file mode 100644
index 0000000..79fdc67
--- /dev/null
+++ b/test/decl2.lm
@@ -0,0 +1,5 @@
+##### LM #####
+Str: str = '77'
+print( Str '\n' )
+##### EXP #####
+77
diff --git a/test/decl3.lm b/test/decl3.lm
new file mode 100644
index 0000000..1c9ef23
--- /dev/null
+++ b/test/decl3.lm
@@ -0,0 +1,3 @@
+##### LM #####
+Int: int
+##### EXP #####
diff --git a/test/div.lm b/test/div.lm
new file mode 100644
index 0000000..84dd807
--- /dev/null
+++ b/test/div.lm
@@ -0,0 +1,42 @@
+##### LM #####
+
+i: int = 0
+while ( i < 34 ) {
+ print( (i / 4) '\n' )
+ i = i + 1
+}
+##### EXP #####
+0
+0
+0
+0
+1
+1
+1
+1
+2
+2
+2
+2
+3
+3
+3
+3
+4
+4
+4
+4
+5
+5
+5
+5
+6
+6
+6
+6
+7
+7
+7
+7
+8
+8
diff --git a/test/exit1.lm b/test/exit1.lm
new file mode 100644
index 0000000..1a4f82a
--- /dev/null
+++ b/test/exit1.lm
@@ -0,0 +1,7 @@
+##### LM #####
+
+print( 'before\n' )
+exit( 0 )
+print( 'after\n' )
+##### EXP #####
+before
diff --git a/test/exit2.lm b/test/exit2.lm
new file mode 100644
index 0000000..2105f98
--- /dev/null
+++ b/test/exit2.lm
@@ -0,0 +1,24 @@
+##### LM #####
+
+int f3()
+{
+ I: int = 1
+ exit( 0 )
+}
+
+int f2()
+{
+ I: int = 1
+ f3()
+}
+
+int f1()
+{
+ I: int = 1
+ f2()
+}
+
+I: int = 1
+
+f1()
+##### EXP #####
diff --git a/test/exit3.lm b/test/exit3.lm
new file mode 100644
index 0000000..4cf4686
--- /dev/null
+++ b/test/exit3.lm
@@ -0,0 +1,24 @@
+##### LM #####
+
+int f3()
+{
+ I: int = 1
+ print( "hello\n" )
+}
+
+int f2()
+{
+ I: int = 1
+ f3()
+}
+
+int f1()
+{
+ I: int = 1
+ f2()
+}
+
+I: int = 1
+
+exit( 0 )
+##### EXP #####
diff --git a/test/export1.lm b/test/export1.lm
new file mode 100644
index 0000000..253b688
--- /dev/null
+++ b/test/export1.lm
@@ -0,0 +1,16 @@
+##### LM #####
+lex
+ token id /[a-z]+/
+ ignore /[ \t]+/
+end
+
+def start
+ [id*]
+
+export Start: start
+export Error: str
+
+parse P: start[ stdin ]
+Error = error
+##### IN #####
+##### EXP #####
diff --git a/test/factor1.lm b/test/factor1.lm
new file mode 100644
index 0000000..74c7453
--- /dev/null
+++ b/test/factor1.lm
@@ -0,0 +1,4 @@
+##### LM #####
+print( 'hello\n')
+##### EXP #####
+hello
diff --git a/test/factor2.lm b/test/factor2.lm
new file mode 100644
index 0000000..a368537
--- /dev/null
+++ b/test/factor2.lm
@@ -0,0 +1,4 @@
+##### LM #####
+print( 77 '\n' )
+##### EXP #####
+77
diff --git a/test/factor3.lm b/test/factor3.lm
new file mode 100644
index 0000000..cc988af
--- /dev/null
+++ b/test/factor3.lm
@@ -0,0 +1,3 @@
+##### LM #####
+open( 'x' ('r') )
+##### EXP #####
diff --git a/test/factor4.lm b/test/factor4.lm
new file mode 100644
index 0000000..f0143ea
--- /dev/null
+++ b/test/factor4.lm
@@ -0,0 +1,3 @@
+##### LM #####
+argv
+##### EXP #####
diff --git a/test/factor5.lm b/test/factor5.lm
new file mode 100644
index 0000000..e2be8c0
--- /dev/null
+++ b/test/factor5.lm
@@ -0,0 +1,6 @@
+##### LM #####
+print( argv.pop() '\n' )
+##### ARGS #####
+a
+##### EXP #####
+a
diff --git a/test/factor6.lm b/test/factor6.lm
new file mode 100644
index 0000000..a9c0c19
--- /dev/null
+++ b/test/factor6.lm
@@ -0,0 +1,12 @@
+##### LM #####
+if ( 1 )
+ print( 'a\n' )
+if ( nil )
+ print( 'b\n' )
+if ( true )
+ print( 'c\n' )
+if ( false )
+ print( 'd\n' )
+##### EXP #####
+a
+c
diff --git a/test/forloop1.lm b/test/forloop1.lm
new file mode 100644
index 0000000..31e37fd
--- /dev/null
+++ b/test/forloop1.lm
@@ -0,0 +1,19 @@
+##### LM #####
+lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+end
+
+def start
+ [id*]
+
+parse P: start[stdin]
+Start: start = P
+for Id: id in Start
+ print( ^Id '\n' )
+##### IN #####
+a b c
+##### EXP #####
+a
+b
+c
diff --git a/test/forloop2.lm b/test/forloop2.lm
new file mode 100644
index 0000000..7d54f8a
--- /dev/null
+++ b/test/forloop2.lm
@@ -0,0 +1,19 @@
+##### LM #####
+lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+end
+
+def start
+ [id*]
+
+parse P: start[stdin]
+Start: start = P
+for Id: id in triter(Start)
+ print( ^Id '\n' )
+##### IN #####
+d e f
+##### EXP #####
+d
+e
+f
diff --git a/test/forloop3.lm b/test/forloop3.lm
new file mode 100644
index 0000000..3749244
--- /dev/null
+++ b/test/forloop3.lm
@@ -0,0 +1,21 @@
+##### LM #####
+lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+end
+
+def start
+ [id*]
+
+parse P: start[stdin]
+Start: start = P
+for Id: id in triter(Start) {
+ print( ^Id )
+ print( '\n' )
+}
+##### IN #####
+d e f
+##### EXP #####
+d
+e
+f
diff --git a/test/func1.lm b/test/func1.lm
new file mode 100644
index 0000000..c973e44
--- /dev/null
+++ b/test/func1.lm
@@ -0,0 +1,9 @@
+##### LM #####
+int f()
+{
+ print( 'hello world\n' )
+}
+
+f()
+##### EXP #####
+hello world
diff --git a/test/func2.lm b/test/func2.lm
new file mode 100644
index 0000000..75de72d
--- /dev/null
+++ b/test/func2.lm
@@ -0,0 +1,9 @@
+##### LM #####
+int f( I: int Str: str )
+{
+ print( I ' ' Str '\n' )
+}
+
+f( 50 'hello world' )
+##### EXP #####
+50 hello world
diff --git a/test/func3.lm b/test/func3.lm
new file mode 100644
index 0000000..cdfcbc3
--- /dev/null
+++ b/test/func3.lm
@@ -0,0 +1,40 @@
+##### LM #####
+lex
+ literal `{ `}
+ literal `struct `type
+ token id /[A-Za-z_][A-Za-z_0-9]*/
+ ignore /[ \t\r\n]+/
+end
+
+def attribute
+ [`type id]
+
+def struct
+ [`struct id `{ attribute* `}]
+
+def program
+ [struct*]
+
+int func( P: program )
+{
+ print( P )
+}
+
+int main()
+{
+ parse P: program[ stdin ]
+ func( P )
+}
+
+main()
+
+##### IN #####
+struct S
+{
+ type T
+}
+##### EXP #####
+struct S
+{
+ type T
+}
diff --git a/test/generate1.lm b/test/generate1.lm
new file mode 100644
index 0000000..ef76f8d
--- /dev/null
+++ b/test/generate1.lm
@@ -0,0 +1,759 @@
+##### LM #####
+context generate
+ # Regular definitions
+ rl ident_char /[a-zA-Z_]/
+
+ # List used as a stack of indentations.
+ IndentStack: list<int>
+
+ # Has a newline been sent for this '\n' .. whitespace match.
+ newline_sent: int
+
+ # Tokens.
+ lex
+ # Python keywords.
+ literal `and `del `from `not `while `as `elif `global `or
+ `with `assert `else `if `pass `yield `break `except
+ `import `print `class `exec `in `raise `continue
+ `finally `is `return `def `for `lambda `try
+
+ # Identifiers
+ rl lowercase /'a'..'z'/
+ rl uppercase /'A'..'Z'/
+ rl letter /lowercase | uppercase/
+ token identifier /(letter|'_') (letter | digit | '_')*/
+
+ # Literals
+ rl escapeseq /'\\' any /
+ rl longstringchar /[^\\]/
+ rl shortstringchar_s /[^\\\n']/
+ rl shortstringchar_d /[^\\\n"]/
+ rl longstringitem /longstringchar | escapeseq/
+ rl shortstringitem_s /shortstringchar_s | escapeseq/
+ rl shortstringitem_d /shortstringchar_d | escapeseq/
+ rl longstring /"'''" longstringitem* :>> "'''" | '"""' longstringitem* :>> '"""'/
+ rl shortstring /"'" shortstringitem_s* "'" | '"' shortstringitem_d* '"'/
+ rl stringprefix /"r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR"/
+ token stringliteral /stringprefix? (shortstring | longstring)/
+
+ # Integers
+ rl hexdigit /digit | 'a'..'f' | 'A'..'F'/
+ rl octdigit /'0'..'7'/
+ rl nonzerodigit /'1'..'9'/
+ rl hexinteger /'0' ('x' | 'X') hexdigit+/
+ rl octinteger /'0' octdigit+/
+ rl decimalinteger /nonzerodigit digit* | '0'/
+ token integer /decimalinteger | octinteger | hexinteger/
+ token longinteger /integer ('l' | 'L')/
+
+ # Floats.
+ rl exponent /('e' | 'E') ('+' | '-')? digit+/
+ rl fraction /'.' digit+/
+ rl intpart /digit+/
+ rl pointfloat /intpart? fraction | intpart '.'/
+ rl exponentfloat /(intpart | pointfloat) exponent/
+ token floatnumber /pointfloat | exponentfloat/
+
+ # Imaginaries.
+ token imagnumber /(floatnumber | intpart) ("j" | "J")/
+
+ # Operators.
+ literal `+ `- `* `** `/ `// `% `<< `>> `& `| `^
+ `~ `< `> `<= `>= `== `!= `<>
+
+ # Delimiters
+ literal `( `) `[ `] `{ `} `@ `, `: `. `` `= `;
+ `+= `-= `*= `/= `//= `%= `&= `|= `^= `>>= `<<=
+ `**=
+
+ literal `...
+
+ # In general whitespace is ignored.
+ ignore WS /' '+/
+
+ # Find and ignore entire blank lines.
+ token BLANK_LINE
+ / '\n' [ \t]* ('#' [^\n]*)? '\n' /
+ {
+ # Need to shorten to take off the newline.
+ # Turn it into ignore.
+ input.push_ignore( make_token( typeid<WS> input.pull(match_length - 1) ) )
+ }
+
+ # Find and ignore comments.
+ token COMMENT
+ / '#' [^\n]* '\n' /
+ {
+ # Need to shorten to take off the newline. Turn it into ignore.
+ input.push_ignore( make_token( typeid<WS> input.pull(match_length - 1) ) )
+ }
+
+ # These tokens are generated
+ token INDENT //
+ token DEDENT //
+ token NEWLINE //
+ ignore IND_WS //
+
+ token INDENTATION
+ /'\n' [ \t]*/
+ {
+ # We have squared up INDENTs and DEDENTs. Ignore the entire match.
+ input.push_ignore( make_token( typeid<WS> input.pull(match_length) ) )
+
+ # We have already sent the newline, compute the indentation level.
+ data_length: int = match_length - 1
+
+ if data_length > IndentStack.top {
+ # The indentation level is more than the level on the top
+ # of the stack. This is an indent event. Send as an INDENT.
+ input.push( make_token( typeid<INDENT> '' ) )
+
+ # Push to the stack as per python manual.
+ IndentStack.push( data_length )
+ } else {
+ while data_length < IndentStack.top {
+ # The indentation level is less than the level on the top of
+ # the stack. Pop the level and send one dedent. This flow of
+ # control will execute until we find the right indentation level
+ # to match up with.
+ IndentStack.pop()
+
+ # Send as a DEDENT
+ input.push( make_token( typeid<DEDENT> '' ) )
+ }
+ }
+
+ # FIXME: if data.length is now > top of stack then error. This
+ # means the outdent does not match anything.
+
+ # First the newline.
+ input.push( make_token( typeid<NEWLINE> '' ) )
+ }
+ end
+
+ # Blank lines or comment lines at the beginning of the file.
+ token LEADER / ( [ \t]* ('#' [^\n]*)? '\n' )* /
+
+ def start
+ [file_input]
+
+ def file_input
+ [file_input_forms*]
+
+ def file_input_forms
+ [statement]
+ | [NEWLINE]
+
+ def statement
+ [stmt_list NEWLINE]
+ | [compound_stmt]
+
+ def stmt_list
+ [simple_stmt another_stmt* opt_semi]
+
+ def another_stmt
+ [`; simple_stmt]
+
+ def opt_semi
+ [`;]
+ | []
+
+ def suite
+ [stmt_list NEWLINE]
+ | [NEWLINE INDENT statement_seq DEDENT]
+
+ def statement_seq
+ [statement_seq statement]
+ | [statement]
+
+ def compound_stmt
+ [if_stmt]
+ | [while_stmt]
+ | [for_stmt]
+ | [funcdef]
+
+ def if_stmt
+ [`if expression `: suite elif_part* opt_else_part]
+
+ def elif_part
+ [`elif expression `: suite]
+
+ def opt_else_part
+ [`else `: suite]
+ | []
+
+ def while_stmt
+ [`while expression `: suite opt_else_part]
+
+ def for_stmt
+ [`for target_list `in expression_list `: suite opt_else_part]
+
+ def funcdef
+ [`def funcname `( opt_parameter_list `) `: suite]
+
+ def funcname
+ [identifier]
+
+ def dotted_name
+ [dotted_name `. identifier]
+ | [identifier]
+
+ def opt_parameter_list
+ [parameter_list]
+ | []
+
+ def parameter_list
+ [defparameter_list defparameter opt_comma]
+
+ def defparameter_list
+ [defparameter_list defparameter `,]
+ | []
+
+ def defparameter
+ [parameter]
+ | [parameter `= expression]
+
+ def sublist
+ [sublist_pl opt_comma]
+
+ def sublist_pl
+ [sublist_pl `, parameter]
+ | [parameter]
+
+ def parameter
+ [identifier]
+ | [`( sublist `)]
+
+ def classname
+ [identifier]
+
+ def simple_stmt
+ [expression_stmt]
+ | [assignment_stmt]
+ | [print_stmt]
+
+ def expression_stmt
+ [expression_list]
+
+ def assignment_stmt
+ [target_equals_list expression_list]
+
+ def target_equals_list
+ [target_equals_list target_equals]
+ | [target_equals]
+
+ def target_equals
+ [target_list `=]
+
+ def target_list
+ [target_list_core opt_comma]
+
+ def target_list_core
+ [target_list_core `, target]
+ | [target]
+
+ def target
+ [target_atom target_ext_rep]
+
+ def target_atom
+ [identifier]
+ | [`( target_list `)]
+ | [`[ target_list `]]
+
+ def target_ext_rep
+ [target_ext target_ext_rep]
+ | []
+
+ def target_ext
+ [attributeref]
+ | [subscription]
+ | [slicing]
+
+ def print_stmt
+ [`print opt_expression_list]
+
+ def opt_expression_list
+ [expression_list]
+ | []
+
+ def expression_list
+ [expression_list_core opt_comma]
+
+ def expression_list_core
+ [expression_list_core `, expression]
+ | [expression]
+
+ def opt_comma
+ [`,]
+ | []
+
+ def expression
+ [or_test `if or_test `else test]
+ | [or_test]
+ | [lambda_form]
+
+ def or_test
+ [or_test `or and_test]
+ | [and_test]
+
+ def and_test
+ [and_test `and not_test]
+ | [not_test]
+
+ def not_test
+ [comparison]
+ | [`not not_test]
+
+ def lambda_form
+ [`lambda opt_parameter_list `: expression]
+
+ def test
+ [or_test]
+ | [lambda_form]
+
+ def comparison
+ [or_expr comparison_part*]
+
+ def comparison_part
+ [comp_operator or_expr]
+
+ def comp_operator
+ [`<] | [`>] | [`==] | [`>=] | [`<=] | [`<>] | [`!=] | [`is] |
+ [`is `not] | [`in] | [`not `in]
+
+ def or_expr
+ [primary]
+
+ def primary
+ [atom primary_ext_rep]
+
+ def atom
+ [identifier]
+ | [pyliteral]
+ | [enclosure]
+
+ def primary_ext_rep
+ [primary_ext primary_ext_rep]
+ | []
+
+ def primary_ext
+ [attributeref]
+ | [subscription]
+ | [slicing]
+ | [call]
+
+ def pyliteral
+ [stringliteral]
+ | [integer]
+ | [longinteger]
+ | [floatnumber]
+ | [imagnumber]
+
+ def enclosure
+ [parenth_form]
+ | [list_display]
+ | [generator_expression]
+ | [dict_display]
+ | [string_conversion]
+
+ def parenth_form
+ [`( opt_expression_list `)]
+
+ def list_display
+ [`[ opt_listmaker `]]
+
+ def opt_listmaker
+ [listmaker]
+ | []
+
+ def listmaker
+ [expression list_for]
+ | [expression listmaker_ext* opt_comma]
+
+ def listmaker_ext
+ [`, expression]
+
+ def opt_list_iter
+ [list_iter]
+ | []
+
+ def list_iter
+ [list_for]
+ | [list_if]
+
+ def list_if
+ [`if test opt_list_iter]
+
+ def list_for
+ [`for expression_list `in testlist opt_list_iter]
+
+ def testlist
+ [test testlist_ext* opt_comma]
+
+ def testlist_ext
+ [`, test ]
+
+ def generator_expression
+ [`( test genexpr_for `)]
+
+ def genexpr_for
+ [`for expression_list `in test opt_genexpr_iter]
+
+ def opt_genexpr_iter
+ [genexpr_iter]
+ | []
+
+ def genexpr_iter
+ [genexpr_for]
+ | [genexpr_if]
+
+ def genexpr_if
+ [`if test opt_genexpr_iter]
+
+ def dict_display
+ [`{ opt_key_datum_list `}]
+
+ def opt_key_datum_list
+ [key_datum_list]
+ | []
+
+ def key_datum_list
+ [key_datum key_datum_list_ext* opt_comma]
+
+ def key_datum_list_ext
+ [`, key_datum]
+
+ def key_datum
+ [expression `: expression]
+
+ def string_conversion
+ [`` expression_list ``]
+
+ def attributeref
+ [`. identifier]
+
+ def subscription
+ [`[ expression_list `]]
+
+ # The natural ordered choice does not suffice here. Must force it.
+
+ def slicing
+ [simple_slicing]
+ | [extended_slicing]
+
+ def simple_slicing
+ [`[ short_slice `]]
+
+ def extended_slicing
+ [`[ slice_list `]]
+
+ def slice_list
+ [slice_item slice_list_ext* opt_comma]
+
+ def slice_list_ext
+ [`, slice_item]
+
+ def slice_item
+ [expression]
+ | [proper_slice]
+ | [ellipsis]
+
+ def proper_slice
+ [short_slice]
+ | [long_slice]
+
+ def short_slice
+ [`:]
+ | [`: upper_bound]
+ | [lower_bound `:]
+ | [lower_bound `: upper_bound]
+
+ def long_slice
+ [short_slice `: stride]
+ | [short_slice `:]
+
+ def lower_bound
+ [expression]
+
+ def upper_bound
+ [expression]
+
+ def stride
+ [expression]
+
+ def ellipsis
+ [`...]
+
+ def call
+ [`( opt_argument_list `)]
+
+ def opt_argument_list
+ [argument_list opt_comma]
+ | []
+
+ def argument_list
+ [positional_arguments opt_comma_keyword_arguments]
+ | [keyword_arguments]
+
+ def positional_arguments
+ [positional_arguments `, expression]
+ | [expression]
+
+ def opt_comma_keyword_arguments
+ [`, keyword_arguments]
+ | []
+
+ def keyword_arguments
+ [keyword_arguments `, keyword_item]
+ | [keyword_item]
+
+ def keyword_item
+ [identifier `= expression]
+
+end # generate
+
+int print_stmts( S: generate::start )
+{
+ for Stmt: generate::statement in S
+ print( 'STMT: ' ^Stmt '\n' )
+}
+
+int print_target_subscriptions_and_slicings( Start: generate::start )
+{
+ for TI: generate::target_ext in Start {
+ if match TI [generate::subscription] {
+ print( 'TARGET SUBSCRIPTION: ' ^TI '\n' )
+ }
+
+ if match TI [generate::simple_slicing] {
+ print( 'TARGET SIMPLE SLICING: ' ^TI '\n' )
+ }
+
+ if match TI [generate::extended_slicing] {
+ print( 'TARGET EXTENDED SLICING: ' ^TI '\n' )
+ }
+ }
+
+}
+
+int print_primary_subscriptions_and_slicings( Start: generate::start )
+{
+ for PI: generate::primary_ext in Start {
+ if match PI [generate::subscription] {
+ print( 'PRIMARY SUBSCRIPTION: ' ^PI '\n' )
+ }
+
+ if match PI [generate::simple_slicing] {
+ print( 'PRIMARY SIMPLE SLICING: ' ^PI '\n' )
+ }
+
+ if match PI [generate::extended_slicing] {
+ print( 'PRIMARY EXTENDED SLICING: ' ^PI '\n' )
+ }
+ }
+}
+
+cons Generate: generate[]
+
+# List used as a stack of indentations.
+Generate.IndentStack = cons list<int> []
+Generate.IndentStack.push( 0 )
+
+# Has a newline been sent for this '\n' .. whitespace match.
+Generate.newline_sent = 0
+
+parse S: generate::start(Generate)[ stdin ]
+
+print( '*** SUCCESS ***\n' )
+print( ^S '\n' )
+print( '***\n' )
+print_stmts( S )
+print_target_subscriptions_and_slicings( S )
+print_primary_subscriptions_and_slicings( S )
+print( '*** SUCCESS ***\n' )
+##### IN #####
+
+# dude, this is a comment
+ # some more
+hello
+def dude():
+ yes
+ awesome;
+
+ # Here we have a comment
+ def realy_awesome(): # hi there
+ in_more
+
+ same_level
+ def one_liner(): first; second # both inside one_liner
+
+ back_down
+
+last_statement
+
+# dude, this is a comment
+ # some more
+hello
+if 1:
+ yes
+ awesome;
+
+ # Here we have a comment
+ if ('hello'): # hi there
+ in_more
+
+ same_level
+ if ['dude', 'dudess'].horsie(): first; second # both inside one_liner
+ 1
+
+ back_down
+
+last_statement
+
+hello = 1.1(20);
+
+# subscription
+a[1] = b[2];
+
+# simple slicing
+c[1:1] = d[2:2];
+
+# simple slicing
+e[1:1, 2:2] = f[3:3, 4:4];
+##### EXP #####
+*** SUCCESS ***
+
+hello
+def dude():
+ yes
+ awesome;
+
+ # Here we have a comment
+ def realy_awesome(): # hi there
+ in_more
+
+ same_level
+ def one_liner(): first; second # both inside one_liner
+
+ back_down
+
+last_statement
+
+# dude, this is a comment
+ # some more
+hello
+if 1:
+ yes
+ awesome;
+
+ # Here we have a comment
+ if ('hello'): # hi there
+ in_more
+
+ same_level
+ if ['dude', 'dudess'].horsie(): first; second # both inside one_liner
+ 1
+
+ back_down
+
+last_statement
+
+hello = 1.1(20);
+
+# subscription
+a[1] = b[2];
+
+# simple slicing
+c[1:1] = d[2:2];
+
+# simple slicing
+e[1:1, 2:2] = f[3:3, 4:4];
+***
+STMT: hello
+STMT: def dude():
+ yes
+ awesome;
+
+ # Here we have a comment
+ def realy_awesome(): # hi there
+ in_more
+
+ same_level
+ def one_liner(): first; second # both inside one_liner
+
+ back_down
+
+STMT: yes
+STMT: awesome;
+
+ # Here we have a comment
+STMT: def realy_awesome(): # hi there
+ in_more
+
+ same_level
+ def one_liner(): first; second # both inside one_liner
+
+STMT: in_more
+
+STMT: same_level
+STMT: def one_liner(): first; second # both inside one_liner
+
+STMT: back_down
+
+STMT: last_statement
+
+# dude, this is a comment
+ # some more
+STMT: hello
+STMT: if 1:
+ yes
+ awesome;
+
+ # Here we have a comment
+ if ('hello'): # hi there
+ in_more
+
+ same_level
+ if ['dude', 'dudess'].horsie(): first; second # both inside one_liner
+ 1
+
+ back_down
+
+STMT: yes
+STMT: awesome;
+
+ # Here we have a comment
+STMT: if ('hello'): # hi there
+ in_more
+
+ same_level
+ if ['dude', 'dudess'].horsie(): first; second # both inside one_liner
+ 1
+
+STMT: in_more
+
+STMT: same_level
+STMT: if ['dude', 'dudess'].horsie(): first; second # both inside one_liner
+STMT: 1
+
+STMT: back_down
+
+STMT: last_statement
+
+STMT: hello = 1.1(20);
+
+# subscription
+STMT: a[1] = b[2];
+
+# simple slicing
+STMT: c[1:1] = d[2:2];
+
+# simple slicing
+STMT: e[1:1, 2:2] = f[3:3, 4:4];
+TARGET SUBSCRIPTION: [1]
+TARGET SIMPLE SLICING: [1:1]
+TARGET EXTENDED SLICING: [1:1, 2:2]
+PRIMARY SUBSCRIPTION: [2]
+PRIMARY SIMPLE SLICING: [2:2]
+PRIMARY EXTENDED SLICING: [3:3, 4:4]
+*** SUCCESS ***
diff --git a/test/generate2.lm b/test/generate2.lm
new file mode 100644
index 0000000..946c478
--- /dev/null
+++ b/test/generate2.lm
@@ -0,0 +1,214 @@
+##### LM #####
+context generate
+ def open_item
+ type: str
+ num: int
+ []
+
+ OpenStack: list<open_item>
+
+ lex
+ token stray_close //
+
+ token ocurly /'{'+/
+ {
+ input.pull( match_length )
+
+ cons OI: open_item( '{' match_length ) []
+ OpenStack.push( OI )
+ i: int = 0
+ while ( i < match_length ) {
+ input.push( make_token( typeid<ocurly> '{' ) )
+ i = i + 1
+ }
+ }
+
+ token ccurly1 //
+ token ccurly2 //
+ token ccurly3 //
+ token missing_curly //
+
+ token tmp1 /'}'+/
+ {
+ if OpenStack.length > 0 && OpenStack.tail.type == '{' {
+ length: int = 3
+ if ( length > match_length )
+ length = match_length
+
+ Tail: open_item = OpenStack.pop()
+ if ( length > Tail.num )
+ length = Tail.num
+
+ if ( length == 1 )
+ input.push( make_token( typeid<ccurly1> input.pull( 1 ) ) )
+ else if ( length == 2 )
+ input.push( make_token( typeid<ccurly2> input.pull( 2 ) ) )
+ else if ( length == 3 )
+ input.push( make_token( typeid<ccurly3> input.pull( 3 ) ) )
+
+ Tail.num = Tail.num - length
+
+ if ( Tail.num > 0 )
+ OpenStack.push( Tail )
+ }
+ else {
+ input.push( make_token( typeid<stray_close> input.pull( match_length ) ) )
+ }
+ }
+
+ token osquare /'['+/
+ {
+ input.pull( match_length )
+ OI: open_item = construct open_item( '[' match_length ) []
+ OpenStack.push( OI )
+ i: int = 0
+ while ( i < match_length ) {
+ input.push( make_token( typeid<osquare> '[' ) )
+ i = i + 1
+ }
+ }
+
+ token csquare1 //
+ token csquare2 //
+ token missing_square //
+
+ token tmp2 /']'+/
+ {
+ if OpenStack.length > 0 && OpenStack.tail.type == '[' {
+ length: int = 2
+ if ( length > match_length )
+ length = match_length
+
+ Tail: open_item = OpenStack.pop()
+ if ( length > Tail.num )
+ length = Tail.num
+
+ if ( length == 1 )
+ input.push( make_token( typeid<csquare1> input.pull( 1 ) ) )
+ else if ( length == 2 )
+ input.push( make_token( typeid<csquare2> input.pull( 2 ) ) )
+
+ Tail.num = Tail.num - length
+
+ if ( Tail.num > 0 )
+ OpenStack.push( Tail )
+ }
+ else {
+ input.push( make_token( typeid<stray_close> input.pull( match_length ) ) )
+ }
+ }
+
+ literal `|
+ token char /any/
+
+ preeof {
+ while ( OpenStack.length > 0 ) {
+ Tail: open_item = OpenStack.pop()
+ i: int
+ if ( Tail.type == '{' ) {
+ i = 0
+ while ( i < Tail.num ) {
+ input.push( make_token( typeid<missing_curly> '}' ) )
+ i = i + 1
+ }
+ }
+ else if ( Tail.type == '[' ) {
+ i = 0
+ while ( i < Tail.num ) {
+ input.push( make_token( typeid<missing_square> ']' ) )
+ i = i + 1
+ }
+ }
+ }
+ }
+ end
+
+ #
+ # Internal Links
+ #
+
+
+ lex
+ literal `http:
+ literal `ftp:
+ literal `mailto:
+ end
+
+ def el_prefix
+ [`http:]
+ | [`ftp:]
+ | [`mailto:]
+
+ def external_link
+ [osquare item* csquare1]
+
+ def internal_link
+ [osquare osquare item* csquare2]
+
+ def unclosed_square
+ [osquare item* missing_square]
+
+ #
+ # Templates
+ #
+
+ def sing_template
+ [ocurly item* ccurly1]
+
+ def template
+ [ocurly ocurly item* ccurly2]
+
+ def parameter
+ [ocurly ocurly ocurly item* ccurly3]
+
+ def unclosed_curly
+ [ocurly item* missing_curly]
+
+ #
+ # Template Parameters
+ #
+
+
+ def U1 []
+ def U2 []
+ def U3 []
+
+ def item
+ [external_link]
+ | [internal_link]
+ | [unclosed_curly]
+ | [sing_template]
+ | [template]
+ | [parameter]
+ | [unclosed_curly]
+ | [stray_close]
+ | [osquare]
+ | [`|]
+ | [char]
+
+ def start
+ [item*]
+
+end # generate
+
+cons Generate: generate[]
+
+Generate.OpenStack = construct list<generate::open_item> []
+Sentinal: generate::open_item = construct generate::open_item( '** SENTINAL **' 1 ) []
+Generate.OpenStack.push( Sentinal )
+
+parse S: generate::start(Generate)[stdin]
+
+if S {
+ for I: generate::external_link in S
+ print( 'EXTERNAL LINK: ' I '\n' )
+
+ for I: generate::internal_link in S
+ print( 'INTERNAL LINK: ' I '\n' )
+}
+##### IN #####
+[external]
+[[internal]]
+##### EXP #####
+EXTERNAL LINK: [external]
+INTERNAL LINK: [[internal]]
diff --git a/test/heredoc.lm b/test/heredoc.lm
new file mode 100644
index 0000000..05bf300
--- /dev/null
+++ b/test/heredoc.lm
@@ -0,0 +1,59 @@
+##### LM #####
+context heredoc
+ rl ident_char /[a-zA-Z_]/
+
+ lex
+ # Tokens
+ token other /(^(ident_char|0|'\n'))+/
+
+ token here_close //
+ token id
+ /ident_char+/
+ {
+ if HereId && HereId == match_text {
+ input.push( make_token(
+ typeid<here_close>
+ input.pull(match_length - 1) ) )
+ }
+ else {
+ input.push( make_token( typeid<id> input.pull(match_length) ) )
+ }
+ }
+
+ token nl /'\n'/
+ end
+
+ def here_name
+ [id]
+ {
+ HereId = $r1
+ }
+
+ HereId: str
+
+ def here_data
+ [here_data_item*]
+
+ def here_data_item
+ [id]
+ | [other]
+ | [nl]
+
+ def start
+ [here_name here_data here_close id nl]
+end # heredoc
+
+cons HereDoc: heredoc[]
+
+parse S: heredoc::start(HereDoc)[stdin]
+print_xml(S)
+print( '\n' )
+##### IN #####
+hello
+random 9392af j9 stuff
+hello
+##### EXP #####
+<heredoc::start><heredoc::here_name><heredoc::id>hello</heredoc::id></heredoc::here_name><heredoc::here_data><heredoc::_repeat_here_data_item><heredoc::here_data_item><heredoc::nl>
+</heredoc::nl></heredoc::here_data_item><heredoc::here_data_item><heredoc::id>random</heredoc::id></heredoc::here_data_item><heredoc::here_data_item><heredoc::other> 9392</heredoc::other></heredoc::here_data_item><heredoc::here_data_item><heredoc::id>af</heredoc::id></heredoc::here_data_item><heredoc::here_data_item><heredoc::other> </heredoc::other></heredoc::here_data_item><heredoc::here_data_item><heredoc::id>j</heredoc::id></heredoc::here_data_item><heredoc::here_data_item><heredoc::other>9 </heredoc::other></heredoc::here_data_item><heredoc::here_data_item><heredoc::id>stuff</heredoc::id></heredoc::here_data_item><heredoc::here_data_item><heredoc::nl>
+</heredoc::nl></heredoc::here_data_item></heredoc::_repeat_here_data_item></heredoc::here_data><heredoc::here_close>hell</heredoc::here_close><heredoc::id>o</heredoc::id><heredoc::nl>
+</heredoc::nl></heredoc::start>
diff --git a/test/ifblock1.lm b/test/ifblock1.lm
new file mode 100644
index 0000000..3797529
--- /dev/null
+++ b/test/ifblock1.lm
@@ -0,0 +1,46 @@
+##### LM #####
+if 1
+ print( '1\n' )
+
+if 2 {
+ print( '2\n' )
+}
+
+if 3 {
+ print( '3\n' )
+ print( '4\n' )
+}
+
+if 0
+ print( '0\n' )
+elsif 0
+ print( '0\n' )
+
+if 0
+ print( '0\n' )
+elsif 1
+ print( '5\n' )
+
+if 0
+ print( '0\n' )
+elsif 0
+ print( '0\n' )
+elsif 1
+ print( '6\n' )
+
+if 0
+ print( '0\n' )
+elsif 0
+ print( '0\n' )
+elsif 0
+ print( '0\n' )
+else
+ print( '7\n' )
+##### EXP #####
+1
+2
+3
+4
+5
+6
+7
diff --git a/test/ignore1.lm b/test/ignore1.lm
new file mode 100644
index 0000000..47f631c
--- /dev/null
+++ b/test/ignore1.lm
@@ -0,0 +1,59 @@
+##### LM #####
+
+#
+# Regular Definitions
+#
+rl rl_ws /[.+ \t\n\r\v]+/
+rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+
+#
+# Tokens
+#
+
+lex
+ literal `= `< `> `/
+
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ # Open and close id
+ token id /rl_id/
+end
+
+#
+# Productions
+#
+
+def attr [id `= id]
+
+def attr_list
+ [attr_list attr]
+| []
+
+def open_tag
+ [`< id attr_list `>]
+
+def close_tag
+ [`< `/ id `>]
+
+def tag
+ [open_tag item_list close_tag]
+
+def item_list
+ [item_list tag]
+| []
+
+parse Attrs: attr_list[ stdin ]
+
+print( Attrs )
+
+construct IL: item_list
+ ["<wrapper .[Attrs]. ></wrapper>\n"]
+
+print( IL )
+##### IN #####
++ foo = asdf +
+##### EXP #####
++ foo = asdf +
+<wrapper .+ foo = asdf +
+. ></wrapper>
diff --git a/test/ignore2.lm b/test/ignore2.lm
new file mode 100644
index 0000000..f4aa963
--- /dev/null
+++ b/test/ignore2.lm
@@ -0,0 +1,36 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+end
+
+def item
+ [id]
+| [`( item* `)]
+
+def start
+ [item*]
+
+parse Input: start[ stdin ]
+
+cons Output: accum<start> []
+
+for Id: id in Input {
+ send Output
+ "( [^Id] )
+}
+
+S: start = Output()
+
+print( S )
+##### IN #####
+a b c ( chocolate fudge ) d e
+##### EXP #####
+( a )
+( b )
+( c )
+( chocolate )
+( fudge )
+( d )
+( e )
diff --git a/test/ignore3.lm b/test/ignore3.lm
new file mode 100644
index 0000000..ccf9c13
--- /dev/null
+++ b/test/ignore3.lm
@@ -0,0 +1,53 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `*
+ literal `( -ni
+ literal ni- `)
+ literal `! `;
+ token id /[a-zA-Z_0-9]+/
+end
+
+lex
+ ignore /space+/
+ token inner_t /[a-zA-Z_0-9]+/
+ token empty -
+end
+
+def inner
+ [inner_t*]
+| [empty]
+
+def item
+ [id]
+| [`( inner `)]
+
+def start
+ [item* `;]
+
+parse Start: start[ stdin ]
+
+if ( ! Start ) {
+ print( 'parse error\n' )
+ exit( 0 )
+}
+
+for I: item in Start {
+ print( 'item: .' I '.\n' )
+ if match I [ O: `( Inner: inner C: `) ]
+ print( 'innr: .' O '.' Inner '.' C '.\n' )
+}
+
+##### IN #####
+a b c ( d ) e ( ) f g;
+##### EXP #####
+item: .a .
+item: .b .
+item: .c .
+item: .( d ) .
+innr: .(. d .) .
+item: .e .
+item: .( ) .
+innr: .(. .) .
+item: .f .
+item: .g.
diff --git a/test/ignore4.lm b/test/ignore4.lm
new file mode 100644
index 0000000..cdd94b3
--- /dev/null
+++ b/test/ignore4.lm
@@ -0,0 +1,74 @@
+##### LM #####
+namespace hash
+
+ lex
+ literal `define `include
+ token NL /'\n'/ -ni
+
+ token id /[a-zA-Z_][a-zA-Z_0-9]*/
+ token number /[0-9]+/
+ token string /'"' ( [^"\\] | '\\' any )* '"'/
+
+ ignore /[ \t]+/
+
+ end
+
+ def hash
+ [`define id number NL]
+ | [`include string NL]
+
+end # hash
+
+namespace lang
+
+ lex
+ ignore /space+/
+ literal `* `( `) `; `#
+ token id /[a-zA-Z_][a-zA-Z_0-9]*/
+ token number /[0-9]+/
+ end
+
+ def item
+ [id]
+ | [`( item* `)]
+
+ def statement
+ [item* `;]
+ | [`# hash::hash]
+
+ def start
+ [statement*]
+
+end # lang
+
+parse Input: lang::start[ stdin ]
+
+if ! Input
+ print( error '\n' )
+else {
+ #print( Input.tree '\n' )
+ for H: lang::statement in Input {
+ require H [ lang::`# hash::hash ]
+ print( '--' H '==\n' )
+ }
+}
+##### IN #####
+
+hello;
+
+#include "input1"
+
+#include "input2"
+
+#include "input3"
+
+there;
+##### EXP #####
+--#include "input1"
+==
+--
+#include "input2"
+==
+--
+#include "input3"
+==
diff --git a/test/ignore5.lm b/test/ignore5.lm
new file mode 100644
index 0000000..28a3392
--- /dev/null
+++ b/test/ignore5.lm
@@ -0,0 +1,51 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `* `! `;
+ literal `( -ni ni- `)
+ token id /[a-zA-Z_0-9]+/
+end
+
+lex
+ ignore /space+/
+ token inner_t /[a-zA-Z_0-9]+/
+
+ token empty -
+end
+
+def inner
+ [empty inner_t*]
+
+def item
+ [id]
+| [`( inner `)]
+
+def start
+ [item* `;]
+
+parse Start: start[ stdin ]
+
+if ( ! Start ) {
+ print( 'parse error\n' )
+ exit( 0 )
+}
+
+for I: item in Start {
+ print( 'item: .' I '.\n' )
+ if match I [ O: `( Inner: inner C: `) ]
+ print( 'innr: .' O '.' Inner '.' C '.\n' )
+}
+
+##### IN #####
+a b c ( d ) e ( ) f g;
+##### EXP #####
+item: .a .
+item: .b .
+item: .c .
+item: .( d ) .
+innr: .(. d .) .
+item: .e .
+item: .( ) .
+innr: .(. .) .
+item: .f .
+item: .g.
diff --git a/test/include1.lm b/test/include1.lm
new file mode 100644
index 0000000..efd29ba
--- /dev/null
+++ b/test/include1.lm
@@ -0,0 +1,5 @@
+##### LM #####
+include 'include1a.lmi'
+print( ' world\n' )
+##### EXP #####
+hello world
diff --git a/test/include1a.lmi b/test/include1a.lmi
new file mode 100644
index 0000000..5de6f69
--- /dev/null
+++ b/test/include1a.lmi
@@ -0,0 +1,2 @@
+
+print( 'hello' )
diff --git a/test/inpush1.lm b/test/inpush1.lm
new file mode 100644
index 0000000..9f7be15
--- /dev/null
+++ b/test/inpush1.lm
@@ -0,0 +1,134 @@
+##### LM #####
+namespace string
+ lex
+ literal `"
+ token data /[^"\\]+/
+ token escape /'\\' any/
+ end
+
+ def string_data
+ [data]
+ | [escape]
+
+ def string
+ [`" string_data* `"]
+
+ str unquote( S: string )
+ {
+ match S [`" DL: string_data* `"]
+ for E: escape in DL
+ E.data = 'x'
+ return $DL
+ }
+
+end # string
+
+namespace hash
+
+ lex
+ literal `define `include
+ literal `#
+ token NL /'\n'/ -ni
+
+ token id /[a-zA-Z_][a-zA-Z_0-9]*/
+ token number /[0-9]+/
+
+ ignore /[ \t]/
+ end
+
+ def hash
+ [`# `define Id: id number NL]
+ | [`# `include Inc: string::string NL]
+
+end # hash
+
+token rest_of_line /[^\n]* '\n'/
+
+namespace lang
+
+ lex
+ ignore /space/
+ literal `* `( `) `;
+ token id /[a-zA-Z_][a-zA-Z_0-9]*/
+ token number /[0-9]+/
+
+ token hash /'#'/ {
+ parse_stop H: hash::hash[ input ]
+ if ( H ) {
+ if ( H.Inc ) {
+ FN: str = unquote( H.Inc )
+ print( 'opening ' FN '\n' )
+ IS: stream = open( FN 'r' )
+ if ( ! IS ) {
+ print( 'ERROR: failed to open ' FN '\n' )
+ exit(1)
+ }
+ input.push( IS )
+ }
+ }
+ else {
+ parse_stop L: rest_of_line[ input ]
+ if ! L {
+ print( "ERROR: stuck: " error )
+ exit(1)
+ }
+ print( "ERROR: failed to parse # directive: " L )
+ }
+ }
+ end
+
+ def item
+ [id]
+ | [`( item* `)]
+
+ def statement
+ [item* `;]
+
+ def start
+ [statement*]
+
+end # lang
+
+parse Input: lang::start[ stdin ]
+
+if ! Input
+ print( error '\n' )
+else {
+ print( Input )
+}
+##### IN #####
+
+hello;
+
+#include "inpush1a.in"
+
+there;
+
+#include "inpush1b.in"
+
+dude;
+
+#include "inpush1c.in"
+
+and dudettes;
+##### EXP #####
+opening inpush1a.in
+opening inpush1b.in
+opening inpush1c.in
+
+hello;
+
+a;
+b;
+
+there;
+
+c;
+d;
+
+dude;
+
+e;
+f;
+
+and dudettes;
diff --git a/test/inpush1a.in b/test/inpush1a.in
new file mode 100644
index 0000000..26da0af
--- /dev/null
+++ b/test/inpush1a.in
@@ -0,0 +1,2 @@
+a;
+b;
diff --git a/test/inpush1b.in b/test/inpush1b.in
new file mode 100644
index 0000000..6c57432
--- /dev/null
+++ b/test/inpush1b.in
@@ -0,0 +1,2 @@
+c;
+d;
diff --git a/test/inpush1c.in b/test/inpush1c.in
new file mode 100644
index 0000000..5373832
--- /dev/null
+++ b/test/inpush1c.in
@@ -0,0 +1,2 @@
+e;
+f;
diff --git a/test/island.lm b/test/island.lm
new file mode 100644
index 0000000..8515eb4
--- /dev/null
+++ b/test/island.lm
@@ -0,0 +1,85 @@
+##### LM #####
+
+lex
+ token func_chr /[^{}]+/
+ token func_open /'{'/
+ token func_close /'}'/
+end
+
+def func_item
+ [func_chr]
+| [func_open func_body func_close]
+
+def func_body
+ [func_item*]
+
+def func
+ [ident `( `) `{ func_body func_close ]
+
+lex
+ token ident /[a-zA-Z_]+/
+ token number /[0-9]+/
+
+ rl s_string / "'" ([^'\\\n] | '\\' any )* "'" /
+ rl d_string / '"' ([^"\\\n] | '\\' any )* '"' /
+ token string /s_string | d_string/
+
+ literal `+ `* `; `( `) `{ `}
+
+ ignore wp / [ \t\n]+ /
+end
+
+def class_item
+ [func]
+| [class]
+| [ident `;]
+| [number `;]
+| [string `;]
+
+def class_body
+ [class_item*]
+
+def class
+ [ident `{ class_body `} ]
+
+def top_item
+ [func]
+| [class]
+
+def start
+ [top_item*]
+
+parse S: start[ stdin ]
+print_xml( S )
+print( '\n' )
+
+#pattern start
+# ~class { func() { func() { 1+{2}} } } func() {{a}}
+##### IN #####
+class
+{
+ 1;
+ "string";
+ foo;
+ func()
+ {
+ func()
+ {
+ 1+{2}
+ }
+ }
+}
+
+func()
+{
+ "data"
+ {a}
+}
+##### EXP #####
+<start><_repeat_top_item><top_item><class><ident>class</ident><_literal_0017>{</_literal_0017><class_body><_repeat_class_item><class_item><number>1</number><_literal_0011>;</_literal_0011></class_item><class_item><string>"string"</string><_literal_0011>;</_literal_0011></class_item><class_item><ident>foo</ident><_literal_0011>;</_literal_0011></class_item><class_item><func><ident>func</ident><_literal_0013>(</_literal_0013><_literal_0015>)</_literal_0015><_literal_0017>{</_literal_0017><func_body><_repeat_func_item><func_item><func_chr>func()
+ </func_chr></func_item><func_item><func_open>{</func_open><func_body><_repeat_func_item><func_item><func_chr>
+ 1+</func_chr></func_item><func_item><func_open>{</func_open><func_body><_repeat_func_item><func_item><func_chr>2</func_chr></func_item></_repeat_func_item></func_body><func_close>}</func_close></func_item><func_item><func_chr>
+ </func_chr></func_item></_repeat_func_item></func_body><func_close>}</func_close></func_item><func_item><func_chr>
+ </func_chr></func_item></_repeat_func_item></func_body><func_close>}</func_close></func></class_item></_repeat_class_item></class_body><_literal_0019>}</_literal_0019></class></top_item><top_item><func><ident>func</ident><_literal_0013>(</_literal_0013><_literal_0015>)</_literal_0015><_literal_0017>{</_literal_0017><func_body><_repeat_func_item><func_item><func_chr>"data"
+ </func_chr></func_item><func_item><func_open>{</func_open><func_body><_repeat_func_item><func_item><func_chr>a</func_chr></func_item></_repeat_func_item></func_body><func_close>}</func_close></func_item><func_item><func_chr>
+</func_chr></func_item></_repeat_func_item></func_body><func_close>}</func_close></func></top_item></_repeat_top_item></start>
diff --git a/test/lhs1.lm b/test/lhs1.lm
new file mode 100644
index 0000000..f40297f
--- /dev/null
+++ b/test/lhs1.lm
@@ -0,0 +1,42 @@
+##### LM #####
+
+lex
+ ignore /space+/
+ literal `* `( `) `!
+ token SEMI_NL /';\n'/
+ token id /[a-zA-Z_0-9]+/
+end
+
+def item
+ [id]
+ {
+ lhs = cons item ["( " ^r1 " )"]
+ }
+| [`( item* `)]
+ {
+ lhs = cons item ["( " ^r2 " )"]
+ }
+
+def A
+ [] {
+ print( 'A\n' )
+ }
+
+def B
+ [] {
+ print( 'B\n' )
+ }
+
+def start
+ [A item* `!]
+| [B item* SEMI_NL]
+
+parse Start: start[ stdin ]
+print( Start "\n" )
+##### IN #####
+a b c ( d1 d2 ) e f g ;
+##### EXP #####
+A
+B
+( a )( b )( c )( ( d1 )( d2 ) )( e )( f )( g );
+
diff --git a/test/liftattrs.lm b/test/liftattrs.lm
new file mode 100644
index 0000000..574ea35
--- /dev/null
+++ b/test/liftattrs.lm
@@ -0,0 +1,83 @@
+##### LM #####
+
+#
+# Regular Definitions
+#
+rl rl_ws /[ \t\n\r\v]+/
+rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+
+#
+# Tokens
+#
+
+lex
+ literal `= `< `> `/
+
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ # Open and close id
+ token id /rl_id/
+end
+
+#
+# Productions
+#
+
+def attr [id `= id]
+
+def attr_list
+ [attr_list attr]
+| []
+
+def open_tag
+ [`< id attr_list `>]
+
+def close_tag
+ [`< `/ id `>]
+
+def tag
+ [open_tag item_list close_tag]
+
+def item_list
+ [item_list tag]
+| []
+
+parse ILP: item_list[stdin]
+IL: item_list = ILP
+
+# Get the item list
+match IL [RootItemList: item_list]
+
+# List for collecting the attrs we pull out.
+CollectedAttrs: attr_list = construct attr_list []
+
+# Iterate through all attributes
+for AttrListIter:attr_list in RootItemList {
+ # If the name of the attr is foo, remove it.
+ if match AttrListIter
+ [SubAttrList:attr_list "foo=" Val:id]
+ {
+ # Remove the attribute
+ AttrListIter = construct attr_list
+ [SubAttrList]
+
+ # Add it to the colection
+ CollectedAttrs = construct attr_list
+ [CollectedAttrs " foo=" ^Val]
+ }
+}
+
+# Reconstruct the left hand side with the
+IL = construct item_list
+ ["<wrapper " ^CollectedAttrs ">" ^RootItemList "</wrapper>"]
+
+print( ^IL '\n' )
+##### IN #####
+<t1 a=b foo=bar1 c=d>
+ <t2 foo=bar2 e=f></t2>
+</t1>
+##### EXP #####
+<wrapper foo=bar1 foo=bar2><t1 a=b c=d>
+ <t2 e=f></t2>
+</t1></wrapper>
diff --git a/test/literal1.lm b/test/literal1.lm
new file mode 100644
index 0000000..b18c2b7
--- /dev/null
+++ b/test/literal1.lm
@@ -0,0 +1,4 @@
+##### LM #####
+print( '\hello\tworld\n' )
+##### EXP #####
+hello world
diff --git a/test/lookup1.lm b/test/lookup1.lm
new file mode 100644
index 0000000..7eaf58f
--- /dev/null
+++ b/test/lookup1.lm
@@ -0,0 +1,2416 @@
+##### LM #####
+context lookup
+ #
+ # Data types for global data.
+ #
+
+ # Language objects.
+ def lang_object
+ typeId: int
+ name: str
+
+ # If the object is a typedef, this points to the real object.
+ typedefOf: ptr<lang_object>
+
+ objectMap: map<str list<ptr<lang_object>>>
+ inherited: list<ptr<lang_object>>
+ lookupParent: ptr<lang_object>
+ specializationOf: ptr<lang_object>
+ []
+
+ # This structure is used to keep track of information necessary to make a
+ # declaration. While parsing a declaration it records the declaration's
+ # attributes.
+ def declaration_data
+ isTypedef: int
+ isFriend: int
+ isTemplate: int
+
+ typeObj: ptr<lang_object>
+ []
+
+ def declarator_data
+ qualObj: ptr<lang_object>
+ pdcScope: ptr<lang_object>
+ lookupObj: ptr<lang_object>
+ []
+
+ # Constants for language object types.
+ NamespaceType: int
+ ClassType: int
+ TemplateClassType: int
+ EnumType: int
+ IdType: int
+ TypedefType: int
+ TemplateIdType: int
+
+ #
+ # Global data declarations
+ #
+
+ # Object stacks.
+ curNamespace: list<ptr<lang_object>>
+ declNs: list<ptr<lang_object>>
+ lookupNs: list<ptr<lang_object>>
+ qualNs: list<ptr<lang_object>>
+ templateParamNs: list<ptr<lang_object>>
+
+ # Declaration, declarator data.
+ declarationData: list<declaration_data>
+ declaratorData: list<declarator_data>
+
+ # Template declarations
+ templDecl: list<int>
+
+ # Root namespace object
+ rootNamespace: ptr<lang_object>
+
+ #
+ # Identifier lookup.
+ #
+
+ # Lookup the token in the members of an object.
+ ptr<lang_object> lookupInObject( obj: ptr<lang_object> name: str )
+ {
+ # LOG print( ' looking in ', obj->name, '\n' )
+
+ ol: list<ptr<lang_object>> = obj->objectMap.find( name )
+ if ol {
+ # LOG print( ' * found an object: ', ol.head, '\n' )
+ return ol.head
+ }
+
+ return nil
+ }
+
+ # Lookup in an object and all the objects beneath it in the inheritance
+ # tree.
+ ptr<lang_object> lookupWithInheritance( obj: ptr<lang_object> name: str )
+ {
+ found: ptr<lang_object> = lookupInObject( obj name )
+ if found
+ return found
+
+ localObjInherited: list<ptr<lang_object>> = obj->inherited
+ for II: ptr<lang_object> in localObjInherited {
+ inh: ptr<lang_object> = II
+
+ # First check if the inherited object is the one we are after.
+ if inh->name == name && inh->typeId == ClassType {
+ # LOG print( ' * found a class name\n' )
+ return inh
+ }
+
+ # Otherwise look inside the inherited object.
+ found = lookupWithInheritance( inh name )
+ if found
+ return found
+ }
+
+ return nil
+ }
+
+ ptr<lang_object> unqualifiedLookup( name: str )
+ {
+ found: ptr<lang_object>
+
+ # Start with the objects in the templateParamNs.
+ localTemplateParamNs: list<ptr<lang_object>> = templateParamNs
+ for TemplParaObjIter: ptr<lang_object> in rev_child(localTemplateParamNs) {
+ found = lookupWithInheritance( TemplParaObjIter name )
+ if found
+ break
+ }
+
+ if !found {
+ # Iterator over the objects starting at the head of the lookup stack
+ # and going up through the lookup parents.
+ lookupIn: ptr<lang_object> = lookupNs.top
+ while lookupIn {
+ found = lookupWithInheritance( lookupIn name )
+ if found
+ break
+ lookupIn = lookupIn->lookupParent
+ }
+ }
+
+ return found
+ }
+
+ # The C++ scanner.
+ lex
+ rl fract_const / digit* '.' digit+ | digit+ '.' /
+ rl exponent / [eE] [+\-]? digit+ /
+ rl float_suffix / [flFL] /
+
+ # Single and double literals.
+ token TK_SingleLit /( 'L'? "'" ( [^'\\\n] | '\\' any )* "'" )/
+ token TK_DoubleLit /( 'L'? '"' ( [^"\\\n] | '\\' any )* '"' )/
+
+ literal `extern `namespace `friend `typedef `auto `register
+ `static `mutable `inline `virtual `explicit `const
+ `volatile `restrict `class `struct `union `template
+ `private `protected `public `using `void `char
+ `wchar_t `bool `int `float `double `short `long
+ `signed `unsigned `enum `new `delete `operator
+ `typename `export `throw `try `catch `sizeof
+ `dynamic_cast `static_cast `reinterpret_cast `const_cast
+ `typeid `this `true `false `switch `case `default
+ `if `else `while `do `for `break `continue
+ `return `goto
+
+ # Extensions
+ literal `__typeof `__is_pod `__is_empty
+
+ literal `{ `} `; `, `= `( `) `: `& `* `[ `] `~ `+ `-
+ `/ `< `> `| `^ `% `! `? `.
+
+ literal `:: `== `!= `&& `|| `*= `/= `%= `+= `-= `&=
+ `^= `|= `++ `-- `-> `->* `.* `... `<<= `>>=
+
+ # Token translation targets.
+ def unknown_id [lookup_id]
+ def class_id [lookup_id]
+ def namespace_id [lookup_id]
+ def templ_class_id [lookup_id]
+ def enum_id [lookup_id]
+ def typedef_id [lookup_id]
+ def identifier [lookup_id]
+ def template_id [lookup_id]
+
+ # Identifiers
+ token lookup_id
+ obj: ptr<lang_object>
+ qualObj: ptr<lang_object>
+
+ /( [a-zA-Z_] [a-zA-Z0-9_]* )/
+ {
+ name: str = match_text
+ found: ptr<lang_object> = nil
+ qualObj: ptr<lang_object> = nil
+ if qualNs.top {
+ # LOG print( 'qualified lookup of ', name, '\n' )
+
+ # Transfer the qualification to the token and reset it.
+ qualObj = qualNs.top
+ qualNs.top = nil
+
+ # Lookup using the qualification.
+ found = lookupWithInheritance( qualObj name )
+ }
+ else {
+ # No qualification, full search.
+ # LOG print( 'unqualified lookup of ', name, '\n' )
+ found = unqualifiedLookup( name )
+ }
+
+ # If no match, return an Unknown ID
+ id: int = typeid<unknown_id>
+ if found
+ id = found->typeId
+
+ LookupId: any = make_token( typeid<lookup_id>
+ input.pull(match_length) found qualObj )
+ input.push( make_tree( id LookupId ) )
+
+ }
+
+ # Floats.
+ token TK_Float /( fract_const exponent? float_suffix? |
+ digit+ exponent float_suffix? )/
+
+ # Integer decimal. Leading part buffered by float.
+ token TK_IntegerDecimal /( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )/
+
+ # Integer octal. Leading part buffered by float.
+ token TK_IntegerOctal /( '0' [0-9]+ [ulUL]{0,2} )/
+
+ # Integer hex. Leading 0 buffered by float.
+ token TK_IntegerHex /( '0x' [0-9a-fA-F]+ [ulUL]{0,2} )/
+
+ # Preprocessor line.
+ ignore /'#' [^\n]* '\n'/
+
+ # Comments and whitespace.
+ ignore /( '/*' (any | '\n')* :>> '*/' )/
+ ignore /( '//' any* :> '\n' )/
+ ignore /( any - 33..126 )+/
+ end
+
+ #
+ # Support functions
+ #
+
+ typeId: int
+ name: str
+
+ # If the object is a typedef, this points to the real object.
+ typedefOf: ptr<lang_object>
+
+ objectMap: map<str list<ptr<lang_object>>>
+ inherited: list<ptr<lang_object>>
+ lookupParent: ptr<lang_object>
+ specializationOf: ptr<lang_object>
+
+ ptr<lang_object> createLangObject( typeId: int name: str lookupParent: ptr<lang_object> )
+ {
+ obj: ptr<lang_object> = new construct lang_object(
+ typeId
+ name
+ nil
+ construct map<str list<ptr<lang_object>>> []
+ construct list<ptr<lang_object>> []
+ lookupParent ) []
+ return obj
+ }
+
+ # Building the language object tree.
+ int insertObject( definedIn: ptr<lang_object> name: str obj: ptr<lang_object> )
+ {
+ ol: list<ptr<lang_object>> = definedIn->objectMap.find( name )
+ if !ol {
+ # Element not in the map already
+ ol = construct list<ptr<lang_object>> []
+ }
+ ol.append( obj )
+ definedIn->objectMap.store( name ol )
+ }
+
+ ptr<lang_object> findClass( inObj: ptr<lang_object>name: str )
+ {
+ ol: list<ptr<lang_object>> = inObj->objectMap.find( name )
+ if ol {
+ for ObjIter: ptr<lang_object> in ol {
+ obj: ptr<lang_object> = ObjIter
+ if obj->typeId == ClassType {
+ return obj
+ }
+ }
+ }
+ return nil
+ }
+
+ ptr<lang_object> findTemplateClass( inObj: ptr<lang_object> name: str )
+ {
+ ol: list<ptr<lang_object>> = inObj->objectMap.find( name )
+ if ol {
+ for ObjIter: ptr<lang_object> in ol {
+ obj: ptr<lang_object> = ObjIter
+ if obj->typeId == TemplateClassType
+ return obj
+ }
+ }
+ return nil
+ }
+
+ def root_qual_opt
+ []
+ | [`::]
+
+ def nested_name_specifier_opt
+ [nested_name_specifier_opt qualifying_name `:: designated_qualifying_name `::]
+ | [nested_name_specifier_opt qualifying_name `::]
+ | []
+
+ def nested_name_specifier
+ [nested_name_specifier designated_qualifying_name `::]
+ | [nested_name_specifier qualifying_name `::]
+ | [qualifying_name `::]
+
+ def qualifying_name
+ [class_name]
+ {
+ qualNs.top = r1.lookupId.obj
+ }
+
+ | [namespace_id]
+ {
+ match r1 [Id: lookup_id]
+ qualNs.top = Id.obj
+ }
+
+ | [typedef_id]
+ {
+ match r1 [Id: lookup_id]
+ qualNs.top = Id.obj->typedefOf
+ }
+
+ def designated_qualifying_name
+ [`template any_id]
+ {
+ # FIXME: nulling qualNs is not the right thing to do here.
+ qualNs.top = nil
+ }
+
+ | [`template any_id
+ templ_arg_open template_argument_list_opt templ_arg_close]
+ {
+ # FIXME: nulling qualNs is not the right thing to do here.
+ qualNs.top = nil
+ }
+
+ #
+ # Id Expression
+ #
+
+ def id_expression
+ lookupId: lookup_id
+
+ [root_qual_opt nested_name_specifier_opt unknown_id]
+ {
+ lhs.lookupId = lookup_id in r3
+ }
+
+ | [root_qual_opt nested_name_specifier_opt identifier]
+ {
+ lhs.lookupId = lookup_id in r3
+ }
+
+ | [root_qual_opt nested_name_specifier_opt operator_function_id]
+ {
+ # Normally the token translation transfers the qualification. Since
+ # the operator_function_id does not end in a lookup we must do it ourselves.
+ qualObj: ptr<lang_object> = qualNs.top
+ qualNs.top = nil
+
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<operator_function_id>'
+ lhs.lookupId.qualObj = qualObj
+ }
+
+ | [root_qual_opt nested_name_specifier_opt conversion_function_id]
+ {
+ # Normally the token translation transfers the qualification. Since
+ # the operator_function_id does not } in a lookup we must do it ourselves.
+ qualObj: ptr<lang_object> = qualNs.top
+ qualNs.top = nil
+
+ # Do we need qual reset here becauase operator_function_id does not do it?
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<conversion_function_id>'
+ lhs.lookupId.qualObj = qualObj
+ }
+
+ | [root_qual_opt nested_name_specifier_opt `~ class_name]
+ {
+ lhs.lookupId = r4.lookupId
+ }
+
+ | [root_qual_opt nested_name_specifier_opt template_name]
+ {
+ lhs.lookupId = r3.lookupId
+ }
+
+ def template_name
+ lookupId: lookup_id
+
+ [template_id templ_arg_open template_argument_list_opt templ_arg_close]
+ {
+ lhs.lookupId = lookup_id in r1
+ }
+
+ | [template_id]
+ {
+ lhs.lookupId = lookup_id in r1
+ }
+
+
+ #
+ # Class Names
+ #
+
+ def class_name
+ lookupId: lookup_id
+
+ [class_id]
+ {
+ lhs.lookupId = lookup_id in r1
+ }
+
+ | [templ_class_id]
+ {
+ lhs.lookupId = lookup_id in r1
+ }
+
+ | [templ_class_id templ_arg_open template_argument_list_opt templ_arg_close]
+ {
+ # TODO: Look for a specialization.
+ lhs.lookupId = lookup_id in r1
+ }
+
+ def templ_arg_open
+ [`<]
+ {
+ qualNs.push( nil )
+ }
+
+ def templ_arg_close
+ [`>]
+ {
+ qualNs.pop()
+ }
+
+ def declaration
+ [block_declaration] commit
+ | [function_definition] commit
+ | [template_declaration] commit
+ | [explicit_instantiation] commit
+ | [explicit_specialization] commit
+ | [linkage_specification] commit
+ | [namespace_definition] commit
+
+ #
+ # Declarations
+ #
+
+ def block_declaration
+ [simple_declaration]
+ | [using_declaration]
+ | [using_directive]
+
+ def simple_declaration
+ [declaration_start simple_declaration_forms declaration_end `;]
+
+ # Ordering is important for optimization. The form with the optional
+ # decl_specifier_sing should go second.
+ def simple_declaration_forms
+ [decl_specifier_mult_seq_opt decl_specifier_sing
+ decl_specifier_mult_seq_opt init_declarator_list_opt]
+
+ | [decl_specifier_mult_seq_opt init_declarator_list_opt]
+
+ def declaration_start
+ []
+ {
+ # LOG print( 'opening new declaration_data with templDecl: ', templDecl.top, '\n' )
+ declarationData.push( construct declaration_data ( 0 0 0 ) [] )
+
+ # Transfer the template flag and reset it.
+ declarationData.top.isTemplate = templDecl.top
+ templDecl.push( 0 )
+ }
+
+ def declaration_end
+ []
+ {
+ # LOG print( 'closing declaration_data\n' )
+ declarationData.pop()
+ templDecl.pop()
+ }
+
+ def decl_specifier_sing
+ [type_specifier_sing]
+ {
+ # Store the object type of the declaration (if any) for use
+ # by typedefs.
+ declarationData.top.typeObj = r1.lookupId.obj
+ }
+
+ def type_specifier_seq
+ lookupId: lookup_id
+
+ [type_specifier_mult_seq_opt type_specifier_sing type_specifier_mult_seq_opt]
+ {
+ lhs.lookupId = r2.lookupId
+ }
+
+ def type_specifier_sing
+ lookupId: lookup_id
+
+ [simple_type_specifier]
+ {
+ lhs.lookupId = r1.lookupId
+ }
+
+ | [class_specifier]
+ {
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<class_specifier>'
+ }
+
+ | [enum_specifier]
+ {
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<enum_specifier>'
+ }
+
+ | [elaborated_type_specifier]
+ {
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<elaborated_type_specifier>'
+ }
+
+ # Type specifier sequence without enum specifier or class specifier.
+ def necs_type_specifier_seq
+ [type_specifier_mult_seq_opt necs_type_specifier_sing type_specifier_mult_seq_opt]
+
+ # Type specifier singular without enum specifier or class specifier.
+ def necs_type_specifier_sing
+ [simple_type_specifier]
+ | [elaborated_type_specifier]
+
+ def type_specifier_mult_seq_opt
+ [type_specifier_mult_seq_opt type_specifier_mult]
+ | []
+
+ def type_specifier_mult_seq
+ [type_specifier_mult_seq type_specifier_mult]
+ | [type_specifier_mult]
+
+ def simple_type_specifier
+ lookupId: lookup_id
+
+ [simple_type_specifier_name]
+ {
+ lhs.lookupId = r1.lookupId
+ }
+
+ | [simple_type_specifier_kw_seq]
+ {
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<simple_type_specifier_kw_seq>'
+ }
+
+ | [`typename root_qual_opt nested_name_specifier type_name]
+ {
+ lhs.lookupId = r4.lookupId
+ }
+
+ | [`typename root_qual_opt nested_name_specifier identifier]
+ {
+ lhs.lookupId = lookup_id in r4
+ }
+
+ | [`typename root_qual_opt nested_name_specifier unknown_id]
+ {
+ lhs.lookupId = lookup_id in r4
+ }
+
+ # Extension.
+ | [`__typeof `( expression `)]
+ {
+ lhs.lookupId = construct lookup_id ["x"]
+ lhs.lookupId.data = '<simple_type_specifier_kw_seq>'
+ }
+
+ def simple_type_specifier_name
+ lookupId: lookup_id
+
+ [qual_type_name]
+ {
+ lhs.lookupId = r1.lookupId
+ }
+
+ def simple_type_specifier_kw_seq
+ [simple_type_specifier_kw_seq simple_type_specifier_kw]
+ | [simple_type_specifier_kw]
+
+ def simple_type_specifier_kw
+ [`void]
+ | [`char]
+ | [`wchar_t]
+ | [`bool]
+ | [`int]
+ | [`float]
+ | [`double]
+ | [`short]
+ | [`long]
+ | [`signed]
+ | [`unsigned]
+
+ def qual_type_name
+ lookupId: lookup_id
+
+ [root_qual_opt nested_name_specifier_opt type_name]
+ {
+ lhs.lookupId = r3.lookupId
+ }
+
+ def type_name
+ lookupId: lookup_id
+
+ [class_name]
+ {
+ lhs.lookupId = r1.lookupId
+ }
+
+ | [enum_id]
+ {
+ lhs.lookupId = lookup_id in r1
+ }
+
+ | [typedef_id]
+ {
+ lhs.lookupId = lookup_id in r1
+ }
+
+ # NOTE: the typename case is moved to simple type specifier
+ # to take advantage of its conflict resolution.
+ def elaborated_type_specifier
+ [class_key nested_name_specifier_opt class_head_name]
+ {
+ Id: lookup_id = lookup_id in r3
+ name: str = Id.data
+
+ # Get the ns the class is declared in.
+ parentObj: ptr<lang_object> = declNs.top
+ if Id.qualObj
+ parentObj = Id.qualObj
+
+ # Look for the class in the given scope.
+ declaredClass: ptr<lang_object> = findClass( parentObj name )
+ if !declaredClass
+ declaredClass = findTemplateClass( parentObj name )
+
+ if !declaredClass {
+ # LOG print( 'creating new class: ', name, '\n' )
+
+ # Class does not exist in the parent scope, create it.
+ nsType: int = declaredClassType()
+
+ declaredClass = createLangObject( nsType name lookupNs.top )
+
+ # FIXME: handle friends. Make the class visible only if we are NOT
+ # in a friend declaration. The new class object is necessary to
+ # properly process the body of the class.
+ if declarationData.top.isFriend == 0
+ insertObject( parentObj name declaredClass )
+ }
+ }
+
+ # TODO: Lookup type specialization.
+ | [class_key nested_name_specifier_opt templ_class_id
+ templ_arg_open template_argument_list_opt templ_arg_close]
+
+ | [`enum nested_name_specifier_opt enum_head_name]
+ {
+ # TODO: should look for existing enums of the same name.
+ Id: lookup_id = lookup_id in r3
+ # LOG print( 'creating enumeration ' Id.data '\n' )
+ enum: ptr<lang_object> = createLangObject( EnumType Id.data lookupNs.top )
+ insertObject( declNs.top Id.data enum )
+ }
+
+ def decl_specifier_mult_seq_opt
+ [decl_specifier_mult_seq_opt decl_specifier_mult]
+ | []
+
+ def decl_specifier_mult_seq
+ [decl_specifier_mult_seq decl_specifier_mult]
+ | [decl_specifier_mult]
+
+ def decl_specifier_mult
+ [type_specifier_mult]
+ | [storage_class_specifier]
+ | [function_specifier]
+
+ | [`friend]
+ {
+ declarationData.top.isFriend = 1
+ }
+
+ | [`typedef]
+ {
+ declarationData.top.isTypedef = 1
+ }
+
+ def storage_class_specifier
+ [`auto]
+ | [`register]
+ | [`static]
+ | [`extern]
+ | [`mutable]
+
+ def function_specifier
+ [`inline]
+ | [`virtual]
+ | [`explicit]
+
+ def type_specifier_mult
+ [cv_qualifier]
+
+ def cv_qualifier
+ [`const]
+ | [`volatile]
+ | [`restrict]
+
+ def cv_qualifier_rep
+ [cv_qualifier_rep cv_qualifier]
+ | []
+
+ def namespace_definition
+ [named_namespace_definition]
+ | [unnamed_namespace_definition]
+
+ def named_namespace_definition
+ [original_namespace_definition]
+ | [extension_namespace_definition]
+
+ #
+ # Enumerations
+ #
+
+ def enum_specifier
+ [`enum nested_name_specifier_opt
+ enum_head_name `{ enumerator_list_opt `}]
+ {
+ # TODO: should look for existing enums of the same name.
+ Id: lookup_id = lookup_id in r3
+ # LOG print( 'creating enumeration ' Id.data '\n' )
+ enum: ptr<lang_object> = createLangObject( EnumType Id.data lookupNs.top )
+ insertObject( declNs.top Id.data enum )
+ }
+
+ | [`enum `{ enumerator_list_opt `}]
+
+ def enum_head_name
+ [class_id]
+ | [templ_class_id]
+ | [namespace_id]
+ | [typedef_id]
+ | [enum_id]
+ | [identifier]
+ | [template_id]
+ | [unknown_id]
+
+ def enumerator_list_opt
+ [enumerator_list]
+ | [enumerator_list `,]
+ | []
+
+ def enumerator_list
+ [enumerator_list `, enumerator_definition]
+ | [enumerator_definition]
+
+ def enumerator_definition
+ [enumerator_id]
+ {
+ Id: lookup_id = lookup_id in r1
+ enumId: ptr<lang_object> = createLangObject( IdType Id.data lookupNs.top )
+ insertObject( declNs.top Id.data enumId )
+ }
+
+ | [enumerator_id `= constant_expression]
+ {
+ Id: lookup_id = lookup_id in r1
+ enumId: ptr<lang_object> = createLangObject( IdType Id.data lookupNs.top )
+ insertObject( declNs.top Id.data enumId )
+ }
+
+ def enumerator_id
+ [namespace_id]
+ | [typedef_id]
+ | [enum_id]
+ | [class_id]
+ | [templ_class_id]
+ | [template_id]
+ | [identifier]
+ | [unknown_id]
+
+ #
+ # Declarators
+ #
+
+ def init_declarator_list_opt
+ [init_declarator_list]
+ | []
+
+ def init_declarator_list
+ [init_declarator_list `, init_declarator]
+ | [init_declarator]
+
+ def init_declarator
+ [declarator initializer_opt]
+
+ def initializer_opt
+ [`= initializer_clause]
+ | [`( expression `)]
+ | []
+
+ def initializer_clause
+ [assignment_expression]
+ | [`{ initializer_list `}]
+ | [`{ initializer_list `, `}]
+ | [`{ `}]
+
+ def initializer_list
+ [initializer_list `, initializer_clause]
+ | [initializer_clause]
+
+ #
+ # Expressions
+ #
+
+ def expression
+ [expression `, assignment_expression]
+ | [assignment_expression]
+
+ def expression_opt
+ [expression]
+ | []
+
+ def constant_expression
+ [conditional_expression]
+
+ def constant_expression_opt
+ [constant_expression]
+ | []
+
+ def assignment_expression
+ [conditional_expression]
+ | [logical_or_expression assignment_op assignment_expression]
+ | [throw_expression]
+
+ def assignment_op
+ [`=]
+ | [`*=]
+ | [`/=]
+ | [`%=]
+ | [`+=]
+ | [`-=]
+ | [`>>=]
+ | [`<<=]
+ | [`&=]
+ | [`^=]
+ | [`|=]
+
+ def conditional_expression
+ [logical_or_expression]
+ | [logical_or_expression `? expression `: assignment_expression]
+
+ def logical_or_expression
+ [logical_or_expression `|| logical_and_expression]
+ | [logical_and_expression]
+
+ def logical_and_expression
+ [logical_and_expression `&& inclusive_or_expression]
+ | [inclusive_or_expression]
+
+ def inclusive_or_expression
+ [inclusive_or_expression `| exclusive_or_expression]
+ | [exclusive_or_expression]
+
+ def exclusive_or_expression
+ [exclusive_or_expression `^ and_expression]
+ | [and_expression]
+
+ def and_expression
+ [and_expression `& equality_expression]
+ | [equality_expression]
+
+ def equality_expression
+ [equality_expression `== relational_expression]
+ | [equality_expression `!= relational_expression]
+ | [relational_expression]
+
+ def relational_expression
+ [relational_expression `< shift_expression]
+ | [relational_expression `> shift_expression]
+ | [relational_expression lt_eq shift_expression]
+ | [relational_expression gt_eq shift_expression]
+ | [shift_expression]
+
+ def shift_expression
+ [shift_expression shift_left additive_expression]
+ | [shift_expression shift_right additive_expression]
+ | [additive_expression]
+
+ def additive_expression
+ [additive_expression `+ multiplicative_expression]
+ | [additive_expression `- multiplicative_expression]
+ | [multiplicative_expression]
+
+ def multiplicative_expression
+ [multiplicative_expression `* pm_expression]
+ | [multiplicative_expression `/ pm_expression]
+ | [multiplicative_expression `% pm_expression]
+ | [pm_expression]
+
+ def pm_expression
+ [pm_expression `->* cast_expression]
+ | [pm_expression `.* cast_expression]
+ | [cast_expression]
+
+ def cast_expression
+ [unary_expression]
+ | [`( type_id `) cast_expression]
+
+ def delete_expression
+ [root_qual_opt `delete cast_expression]
+ | [root_qual_opt `delete `[ `] cast_expression]
+
+ def new_initializer_opt
+ [new_initializer]
+ | []
+
+ def new_initializer
+ [`( expression_opt `)]
+
+ def direct_new_declarator
+ [`[ expression `]]
+ | [direct_new_declarator `[ constant_expression `]]
+
+ def new_declarator_opt
+ [new_declarator]
+ | []
+
+ def new_declarator
+ [direct_new_declarator]
+ | [ptr_operator_seq direct_new_declarator]
+ | [ptr_operator_seq]
+
+ def new_type_id
+ [necs_type_specifier_seq new_declarator_opt]
+
+ def new_placement
+ [`( expression `)]
+
+ def new_expression
+ [root_qual_opt `new new_type_id new_initializer_opt]
+ | [root_qual_opt `new new_placement new_type_id new_initializer_opt]
+ | [root_qual_opt `new `( type_id `) new_initializer_opt]
+ | [root_qual_opt `new new_placement `( type_id `) new_initializer_opt]
+
+ def unary_operator
+ [`*]
+ | [`&]
+ | [`+]
+ | [`-]
+ | [`!]
+ | [`~]
+
+ def unary_expression
+ [postfix_expression]
+ | [`++ cast_expression]
+ | [`-- cast_expression]
+ | [unary_operator cast_expression]
+ | [`sizeof `( type_id `)]
+ | [`sizeof unary_expression]
+ | [new_expression]
+ | [delete_expression]
+
+ def function_style_type_conv
+ [simple_type_specifier]
+
+
+ def postfix_expression
+ [primary_expression]
+ | [postfix_expression `[ expression `]]
+ | [postfix_expression `( expression_opt `)]
+ | [function_style_type_conv `( expression_opt `)]
+ | [member_request_expr dot_arrow id_expression]
+ | [member_request_expr dot_arrow pseudo_destructor_call]
+ | [postfix_expression `++]
+ | [postfix_expression `--]
+ | [`dynamic_cast templ_arg_open type_id templ_arg_close `( expression `)]
+ | [`static_cast templ_arg_open type_id templ_arg_close `( expression `)]
+ | [`reinterpret_cast templ_arg_open type_id templ_arg_close `( expression `)]
+ | [`const_cast templ_arg_open type_id templ_arg_close `( expression `)]
+ | [`typeid `( expression `)]
+ | [`typeid `( type_id `)]
+
+ def pseudo_destructor_call
+ [root_qual_opt nested_name_specifier_opt `~ pdc_type_name]
+
+ def primary_expression
+ [expr_lit]
+ | [`this]
+ | [`( expression `)]
+ | [id_expression]
+ # GNU extensions
+ | [`( `{ statement_rep `} `)]
+ | [`__is_pod `( type_id `)]
+ | [`__is_empty `( type_id `)]
+
+ def expr_lit
+ [TK_IntegerDecimal]
+ | [TK_IntegerOctal]
+ | [TK_IntegerHex]
+ | [TK_SingleLit]
+ | [TK_Float]
+ | [double_lit_list]
+ | [`true]
+ | [`false]
+
+ def double_lit_list
+ [TK_DoubleLit double_lit_list]
+ | [TK_DoubleLit]
+
+ def member_request_expr
+ [postfix_expression]
+ # {
+ # # FIXME: If no proper type is found, we must fail.
+ # # LOG print( 'setting member request scope\n' )
+ # # qualNs.set( $1->type != 0 ? $1->type->getObject() : 0 );
+ # }
+
+ def dot_arrow
+ [`->]
+ | [`.]
+
+ def pdc_type_name
+ [enum_id]
+ | [typedef_id]
+
+ #
+ # Statements
+ #
+
+ def statement_rep
+ [statement_rep statement]
+ | []
+
+ def statement
+ [declaration_statement]
+ | [labeled_statement]
+ | [expression_statement]
+ | [compound_statement]
+ | [selection_statement]
+ | [iteration_statement]
+ | [jump_statement]
+ | [try_block]
+
+ def labeled_statement
+ [label_id `: statement]
+ | [`case constant_expression `: statement]
+ | [`default `: statement]
+
+ def label_id
+ [unknown_id]
+ | [identifier]
+ | [class_id]
+ | [templ_class_id]
+ | [namespace_id]
+ | [typedef_id]
+ | [enum_id]
+ | [template_id]
+
+ def compound_statement
+ [`{ compound_begin statement_rep compound_end `}]
+
+ def compound_begin
+ []
+ {
+ newCompound: ptr<lang_object> = createLangObject( 0 '<compound_begin>' lookupNs.top )
+ lookupNs.push( newCompound )
+ declNs.push( newCompound )
+ # LOG print( 'opening <compound>\n' )
+ }
+
+ def compound_end
+ []
+ {
+ lookupNs.pop()
+ declNs.pop()
+ # LOG print( 'closing <compound>\n' )
+ }
+
+ def selection_statement
+ [`if `( condition `) statement elseif_clauses else_clause]
+ | [`switch `( condition `) statement]
+
+ def elseif_clauses
+ [elseif_clauses `else `if `( condition `) statement]
+ | []
+
+ def else_clause
+ [`else statement]
+ | []
+
+ def iteration_statement
+ [`while `( condition `) statement]
+ | [`do statement `while `( expression `) `;]
+ | [`for `( for_init_statement condition_opt `; expression_opt `) statement]
+
+ def jump_statement
+ [`break `;]
+ | [`continue `;]
+ | [`return expression_opt `;]
+ | [`goto any_id `;]
+
+ def any_id
+ [unknown_id]
+ | [class_id]
+ | [namespace_id]
+ | [templ_class_id]
+ | [enum_id]
+ | [typedef_id]
+ | [identifier]
+ | [template_id]
+
+
+ def for_init_statement
+ [expression_statement]
+ | [stmt_block_declaration_forms `;]
+
+ def condition
+ [expression]
+ | [type_specifier_seq declarator `= assignment_expression]
+
+ def condition_opt
+ [condition]
+ | []
+
+ def expression_statement
+ [expression `;]
+ | [`;]
+
+ def declaration_statement
+ [stmt_block_declaration]
+
+ def stmt_block_declaration
+ [declaration_start stmt_block_declaration_forms declaration_end `;]
+ | [using_declaration]
+ | [using_directive]
+
+ def stmt_block_declaration_forms
+ [decl_specifier_mult_seq_opt decl_specifier_sing decl_specifier_mult_seq_opt
+ init_declarator_list_opt]
+ | [decl_specifier_mult_seq init_declarator_list_opt]
+
+ #
+ # Declarators
+ #
+
+ def declarator
+ lookupObj: ptr<lang_object>
+
+ [ptr_operator_seq_opt declarator_id decl_array_or_param_rep declarator_end]
+ {
+ lhs.lookupObj = r4.lookupObj
+ }
+
+ | [ptr_operator_seq_opt `( sub_declarator `) decl_array_or_param_rep declarator_end]
+ {
+ lhs.lookupObj = r6.lookupObj
+ }
+
+ def sub_declarator
+ [ptr_operator_seq declarator_id decl_array_or_param_rep]
+ | [ptr_operator_seq `( sub_declarator `) decl_array_or_param_rep]
+ | [`( sub_declarator `) decl_array_or_param_rep]
+ | [declarator_id decl_array_or_param_rep]
+
+ def decl_array_or_param_rep
+ [decl_array_or_param_rep decl_array_or_param]
+ | []
+
+ def decl_array_or_param
+ [`[ constant_expression_opt `]]
+ | [`( parameter_declaration_clause `) cv_qualifier_rep exception_specification_opt]
+
+ def declarator_id
+ [declarator_id_forms]
+ {
+ name: str = r1.lookupId.data
+ qualObj: ptr<lang_object> = r1.lookupId.qualObj
+
+ parentObj: ptr<lang_object> = declNs.top
+ if qualObj {
+ parentObj = qualObj
+ }
+
+ # Decide if we are declaring a constructor/destructor.
+ isConstructor: bool
+ if parentObj == r1.lookupId.obj {
+ isConstructor = true
+ # LOG print( 'making declarator ' name ' a constructor/destructor\n' )
+ }
+
+ if parentObj->specializationOf &&
+ parentObj->specializationOf == r1.lookupId.obj
+ {
+ isConstructor = true
+ # LOG print( 'making declarator ' name ' a constructor/destructor\n' )
+ }
+
+ obj: ptr<lang_object> = nil
+ if name && !isConstructor && declarationData.top.isFriend == 0 {
+ if declarationData.top.isTypedef {
+ obj = createLangObject( TypedefType name lookupNs.top )
+ obj->typedefOf = declarationData.top.typeObj
+ insertObject( parentObj name obj )
+
+ # LOG print( 'making declarator ' name ' a typedef\n' )
+ }
+ else {
+ if !qualObj {
+ if declarationData.top.isTemplate {
+ # If in a template declaration and the name is not qualified then
+ # create the template id.
+ obj = createLangObject( TemplateIdType name lookupNs.top )
+ #object->objType = declarationData.top.type
+ insertObject( declNs.top name obj )
+
+ # LOG print( 'making declarator ' name ' a template id\n' )
+ }
+ else {
+ obj = createLangObject( IdType name lookupNs.top )
+ #object->objType = declarationData.top().type;
+ insertObject( declNs.top name obj )
+
+ # LOG print( 'making declarator ' name ' an id\n' )
+ }
+ }
+ }
+ }
+
+ declaratorData.push( construct declarator_data (
+ qualObj nil lookupNs.top ) [] )
+
+ # If the declarator is qualified, push the qualification to the lookup
+ # stack. Also save it in the declarator data so it can be passed to a
+ # function body if needed.
+ if qualObj {
+ lookupNs.push( qualObj )
+ declaratorData.top.lookupObj = qualObj
+ }
+
+ # LOG print( 'reduced declarator_id: ' name '\n' )
+ }
+
+ # Undoes the setup done by declarator_id and pdc_start.
+ def declarator_end
+ lookupObj: ptr<lang_object>
+
+ []
+ {
+ # Get the lookupObject from the scope and pass it up. If we are about to
+ # parse a function body it will be needed.
+ lhs.lookupObj = declaratorData.top.lookupObj
+
+ pdcScope: ptr<lang_object> = declaratorData.top.pdcScope
+ qualObj: ptr<lang_object> = declaratorData.top.qualObj
+
+ declaratorData.pop()
+
+ if pdcScope {
+ # LOG print( 'closing <pdc_scope>\n' )
+ lookupNs.pop()
+ declNs.pop()
+ }
+
+ if qualObj {
+ # LOG print( 'popping lookupNs\n' )
+ lookupNs.pop()
+ }
+ }
+
+ def declarator_id_forms
+ lookupId: lookup_id
+
+ [id_expression]
+ {
+ lhs.lookupId = r1.lookupId
+ }
+
+ | [root_qual_opt nested_name_specifier_opt type_name]
+ {
+ lhs.lookupId = r3.lookupId
+ }
+
+ | [root_qual_opt nested_name_specifier_opt `~ class_id]
+ {
+ lhs.lookupId = lookup_id in r4
+ }
+
+ | [root_qual_opt nested_name_specifier_opt `~ templ_class_id]
+ {
+ lhs.lookupId = lookup_id in r4
+ }
+ | [root_qual_opt nested_name_specifier_opt `~ unknown_id]
+ {
+ lhs.lookupId = lookup_id in r4
+ }
+
+ def type_id
+ lookupId: lookup_id
+
+ [type_specifier_seq abstract_declarator_opt]
+ {
+ lhs.lookupId = r1.lookupId
+ }
+
+ def abstract_declarator_opt
+ [abstract_declarator]
+ | []
+
+ def abstract_declarator
+ [ptr_operator_seq abstract_noid abstract_decl_array_or_param_seq_opt declarator_end]
+ | [ptr_operator_seq `( sub_abstract_declarator `)
+ abstract_decl_array_or_param_seq_opt declarator_end]
+ | [abstract_noid abstract_decl_array_or_param_seq declarator_end]
+ | [`( sub_abstract_declarator `) abstract_decl_array_or_param_seq_opt declarator_end]
+
+ def sub_abstract_declarator
+ [ptr_operator_seq abstract_noid abstract_decl_array_or_param_seq_opt]
+
+ | [ptr_operator_seq `( sub_abstract_declarator `)
+ abstract_decl_array_or_param_seq_opt]
+
+ | [`( sub_abstract_declarator `) abstract_decl_array_or_param_seq_opt]
+
+ def abstract_noid
+ []
+ {
+ # Make scope for declarator.
+ declaratorData.push( construct declarator_data [] )
+ }
+
+ def abstract_decl_array_or_param_seq_opt
+ [abstract_decl_array_or_param_seq_opt abstract_decl_array_or_param]
+ | []
+
+ def abstract_decl_array_or_param_seq
+ [abstract_decl_array_or_param_seq abstract_decl_array_or_param]
+ | [abstract_decl_array_or_param]
+
+ def abstract_decl_array_or_param
+ [`[ constant_expression_opt `]]
+ | [`( parameter_declaration_clause `) cv_qualifier_rep
+ exception_specification_opt]
+
+ def parameter_declaration_clause
+ [pdc_start parameter_declaration_list]
+ | [pdc_start parameter_declaration_list `...]
+ | [pdc_start parameter_declaration_list `, `...]
+ | [pdc_start `...]
+ | [pdc_start]
+
+ def pdc_start
+ []
+ {
+ if !declaratorData.top.pdcScope {
+ # We are going to need a scope for the declarator.
+ pdcScope: ptr<lang_object> = createLangObject( 0 '<pdc_scope>' lookupNs.top )
+ lookupNs.push( pdcScope )
+ declNs.push( pdcScope )
+
+ declaratorData.top.pdcScope = pdcScope
+ declaratorData.top.lookupObj = pdcScope
+ # LOG print( 'opening <pdc_scope>\n' )
+ }
+ }
+
+ def parameter_declaration_list
+ [parameter_declaration_list `, parameter_declaration]
+ | [parameter_declaration]
+
+ def parameter_declaration
+ [declaration_start parameter_declaration_forms declaration_end]
+
+ # Ordering the productions such that decl_specifier_sing is tried first is good
+ # for performance.
+ def parameter_declaration_forms
+ [decl_specifier_mult_seq_opt decl_specifier_sing decl_specifier_mult_seq_opt
+ param_maybe_declarator maybe_parameter_init]
+
+ | [decl_specifier_mult_seq param_maybe_declarator maybe_parameter_init]
+
+ def param_maybe_declarator
+ [abstract_declarator]
+ | [declarator]
+ | []
+
+ def maybe_parameter_init
+ [`= constant_expression]
+ | []
+
+ def ptr_operator
+ [`&]
+ | [root_qual_opt nested_name_specifier_opt `* cv_qualifier_rep]
+
+ def ptr_operator_seq
+ [ptr_operator_seq ptr_operator]
+ | [ptr_operator]
+
+ def ptr_operator_seq_opt
+ [ptr_operator_seq_opt ptr_operator]
+ | []
+
+ #
+ # Functions
+ #
+
+ def function_definition
+ [function_def_declaration ctor_initializer_opt function_body function_def_end]
+
+ def function_def_declaration
+ [declaration_start function_def_declaration_forms declaration_end]
+
+ def function_def_declaration_forms
+ [decl_specifier_mult_seq_opt decl_specifier_sing
+ decl_specifier_mult_seq_opt function_def_declarator]
+ | [decl_specifier_mult_seq function_def_declarator]
+ | [function_def_declarator]
+
+ def function_def_declarator
+ [declarator]
+ {
+ # The lookupObj from the declarator is the deepest lookup object found
+ # while parsing the declarator. Make it visible in the function body.
+ # This could be the args, the qualObj, or the parent to the function.
+ lookupNs.push( r1.lookupObj )
+ }
+
+ def function_def_end
+ []
+ {
+ # Pop the lookup object.
+ lookupNs.pop()
+ }
+
+ def function_body
+ [function_body_begin `{ statement_rep function_body_end `}]
+
+ def function_body_begin
+ []
+ {
+ newFunctionBody: ptr<lang_object> = createLangObject( 0
+ '<function_body_begin>' lookupNs.top )
+ lookupNs.push( newFunctionBody )
+ declNs.push( newFunctionBody )
+ templDecl.push( 0 )
+ # LOG print( 'opening <function_body>\n' )
+ }
+
+ def function_body_end
+ []
+ {
+ # First undoes the function body begin work. Then undoes the setup in
+ # function_def_declarator.
+ declNs.pop()
+ lookupNs.pop()
+ templDecl.pop()
+ # LOG print( 'closing <function_body>\n' )
+ }
+
+
+
+ #
+ # Classs
+ #
+
+ int declaredClassType()
+ {
+ if declarationData.top.isTemplate {
+ return TemplateClassType
+ } else {
+ return ClassType
+ }
+ }
+
+ def class_specifier
+ [class_head base_clause_opt `{ class_member_rep class_body_end `}]
+ {
+ # FIXME: reparse not implemented yet
+ # FIXME FIXME: reparse is actually implemented now implemented
+ # # Visit class function bodies, but skip nested classes.
+ # for CFB: class_function_body in lhs {
+ # skipping class_specifier
+ #
+ # # Reparse the text of the class function body as a function body
+ # function_body FB = parse function_body[ $CFB ]
+ #
+ # # Replace the class function body with the parsed function body.
+ # CFB = cons class_function_body [FB.tree]
+ # }
+ }
+
+ def class_head
+ [class_key]
+ {
+ nsType: int = declaredClassType()
+
+ # LOG print( 'creating new anonymous class\n' )
+ newClass: ptr<lang_object> = createLangObject( nsType
+ '<anon_class>' lookupNs.top )
+ lookupNs.push( newClass )
+ declNs.push( newClass )
+ }
+
+ | [class_key nested_name_specifier_opt class_head_name]
+ {
+ Id: lookup_id = lookup_id in r3
+ name: str = Id.data
+
+ # Get the ns the class is declared in.
+ parentObj: ptr<lang_object> = declNs.top
+ if Id.qualObj
+ parentObj = Id.qualObj
+
+ # Look for the class in the given scope.
+ declaredClass: ptr<lang_object> = findClass( parentObj name )
+ if !declaredClass
+ declaredClass = findTemplateClass( parentObj name )
+
+ if !declaredClass {
+ # LOG print( 'creating new class: ' name '\n' )
+
+ # Class does not exist in the parent scope, create it.
+ nsType: int = declaredClassType()
+
+ declaredClass = createLangObject( nsType name lookupNs.top )
+
+ # FIXME: handle friends. Make the class visible only if we are NOT
+ # in a friend declaration. The new class object is necessary to
+ # properly process the body of the class.
+ if declarationData.top.isFriend == 0
+ insertObject( parentObj name declaredClass )
+ }
+
+ # Push the found/new class.
+ lookupNs.push( declaredClass )
+ declNs.push( declaredClass )
+ }
+
+ | [class_key nested_name_specifier_opt templ_class_id
+ templ_arg_open template_argument_list_opt templ_arg_close]
+ {
+ match r3 [Id: lookup_id]
+ id: str = Id.data
+ classObj: ptr<lang_object> = Id.obj
+
+ # TODO: Try to find the specializaition in the template class object.
+ # TypeList typeList;
+ # makeTypeList( typeList $6->last );
+
+ declaredClass: ptr<lang_object>
+ #declaredClass = classObj->findSpecExact( typeList );
+ if !declaredClass {
+ # LOG print( 'making new template specialization\n' )
+ nsType: int = declaredClassType()
+ declaredClass = createLangObject( nsType id lookupNs.top )
+ # LOG print( 'declaredClass: ' declaredClass '\n' )
+ declaredClass->specializationOf = classObj
+ # $$->typeListMapEl = classObj->typeListMap.insert( typeList declaredClass );
+ }
+
+ # Push the found/new class.
+ lookupNs.push( declaredClass )
+ declNs.push( declaredClass )
+ }
+
+ def class_body_end
+ []
+ {
+ # Pop the class ns.
+ lookupNs.pop()
+ declNs.pop()
+
+ # LOG print( 'closing off class\n' )
+ }
+
+ def class_head_name
+ [class_id]
+ | [templ_class_id]
+ | [namespace_id]
+ | [typedef_id]
+ | [enum_id]
+ | [unknown_id]
+ | [identifier]
+ | [template_id]
+
+ def class_key
+ [`class]
+ | [`struct]
+ | [`union]
+
+ def class_member_rep
+ [class_member_rep class_member]
+ | []
+
+ def class_member
+ [member_declaration]
+ | [access_specifier `:]
+
+ def member_declaration
+ [declaration_start member_declaration_forms declaration_end `;]
+ | [class_function_definition]
+ | [using_declaration]
+ | [template_declaration]
+
+ def class_function_definition
+ [function_def_declaration ctor_initializer_opt class_function_body function_def_end]
+
+ lex
+ token cfb_open /'{'/
+ token cfb_close /'}'/
+ token cfb_string /
+ "'" ( [^'\\\n] | '\\' any )* "'" |
+ '"' ( [^"\\\n] | '\\' any )* '"'/
+ token cfb_comment /
+ ( '/*' (any | '\n')* :>> '*/' ) |
+ ( '//' any* :> '\n' )/
+ token cfb_data /[^{}'"/]+ | '/'/
+ end
+
+ def cfb_item
+ [cfb_data]
+ | [cfb_string]
+ | [cfb_comment]
+ | [cfb_open cfb_item* cfb_close]
+
+ def cfb_conts
+ [cfb_item* cfb_close]
+
+
+
+ def class_function_body
+ # ['{' cfb_conts]
+ #| [function_body]
+ [function_body]
+
+ # Get better performance if the form with decl_specifier_sing comes first.
+ def member_declaration_forms
+ [decl_specifier_mult_seq_opt decl_specifier_sing
+ decl_specifier_mult_seq_opt member_declarator_list_opt]
+ | [decl_specifier_mult_seq_opt member_declarator_list_opt]
+
+ def member_declarator_list_opt
+ [member_declarator_list]
+ | []
+
+ def member_declarator_list
+ [member_declarator_list `, member_declarator]
+ | [member_declarator]
+
+ def member_declarator
+ [declarator]
+ | [declarator `= constant_expression]
+ | [declarator `: constant_expression]
+ | [`: constant_expression]
+
+ def access_specifier
+ [`private]
+ | [`protected]
+ | [`public]
+
+ def access_specifier_opt
+ [access_specifier]
+ | []
+
+ def using_declaration
+ [`using id_expression `;]
+ {
+ obj: ptr<lang_object> = r2.lookupId.obj
+ if obj
+ insertObject( declNs.top obj->name obj )
+ }
+
+ | [`using type_id `;]
+ {
+ obj: ptr<lang_object> = r2.lookupId.obj
+ if obj
+ insertObject( declNs.top obj->name obj )
+ }
+
+ def using_directive
+ [`using `namespace root_qual_opt nested_name_specifier_opt
+ namespace_id `;]
+ {
+ # This uses a simple, incomplete guard against cycles in the graph of
+ # using namespaces. A more sophisticated and complete guard would look
+ # for longer cycles as well. Note that even gcc 3.3.5 does not bother.
+ match r5 [Id: lookup_id]
+ usingObject: ptr<lang_object> = Id.obj
+ inObject: ptr<lang_object> = declNs.top
+ if usingObject != inObject
+ inObject->inherited.append( usingObject )
+ }
+
+
+ #
+ # Derived classes
+ #
+
+ def base_clause_opt
+ [base_clause]
+ | []
+
+ def base_clause
+ [`: base_specifier_list]
+
+ def base_specifier_list
+ [base_specifier_list `, base_specifier]
+ | [base_specifier]
+
+ int addBaseSpecifier( inObject: ptr<lang_object> inheritedObject: ptr<lang_object> )
+ {
+ # Resolve typedefs.
+ if inheritedObject->typeId == TypedefType
+ inheritedObject = inheritedObject->typedefOf
+
+ inObject->inherited.append( inheritedObject )
+ }
+
+ def base_specifier
+ [root_qual_opt nested_name_specifier_opt type_name]
+ {
+ addBaseSpecifier( declNs.top r3.lookupId.obj )
+ }
+
+ | [`virtual access_specifier_opt root_qual_opt nested_name_specifier_opt type_name]
+ {
+ addBaseSpecifier( declNs.top r5.lookupId.obj )
+ }
+
+ | [access_specifier virtual_opt root_qual_opt nested_name_specifier_opt type_name]
+ {
+ addBaseSpecifier( declNs.top r5.lookupId.obj )
+ }
+
+ def virtual_opt
+ [`virtual]
+ | []
+
+ #
+ # Special member functions
+ #
+
+ def conversion_function_id
+ [`operator conversion_type_id]
+
+ def conversion_type_id
+ [necs_type_specifier_seq ptr_operator_seq_opt]
+
+ def ctor_initializer_opt
+ [ctor_initializer]
+ | []
+
+ def ctor_initializer
+ [`: mem_initializer_list]
+
+ def mem_initializer_list
+ [mem_initializer_list `, mem_initializer]
+ | [mem_initializer]
+
+ def mem_initializer
+ [mem_initializer_id `( expression_opt `)]
+
+ def mem_initializer_id
+ [root_qual_opt nested_name_specifier_opt unknown_id]
+ | [root_qual_opt nested_name_specifier_opt identifier]
+ | [root_qual_opt nested_name_specifier_opt type_name]
+ | [root_qual_opt nested_name_specifier_opt template_name]
+
+
+ #
+ # Overloading
+ #
+ def operator_function_id
+ [`operator operator]
+
+ def operator
+ [`+] | [`-] | [`*] | [`/] | [`=] | [`<] | [`>] | [`&] | [`|] |
+ [`^] | [`%] | [`~] | [`!] | [`( `)] | [`[ `]] | [`new] |
+ [`delete] | [`->] | [`++] | [`--] | [`*=] | [`/=] | [`%=] |
+ [`+=] | [`-=] | [`>>=] | [`<<=] | [`&=] | [`^=] | [`|=] | [`==] |
+ [`!=] | [`&&] | [`||] | [lt_eq] | [gt_eq] | [shift_left] | [shift_right]
+
+ def lt_eq
+ [`< `=]
+ # try {
+ # if ( $2->leader != 0 ) {
+ # #ifdef LOG_REDUCE
+ # cerr << "rejecting less-than equals-to" << endl;
+ # #endif
+ # reject();
+ # }
+ # };
+
+ def gt_eq
+ [`> `=]
+ # try {
+ # if ( $2->leader != 0 ) {
+ # #ifdef LOG_REDUCE
+ # cerr << "rejecting greater-than equals-to" << endl;
+ # #endif
+ # reject();
+ # }
+ # };
+
+ def shift_left
+ [`< `<]
+ # try {
+ # if ( $2->leader != 0 ) {
+ # #ifdef LOG_REDUCE
+ # cerr << "rejecting shift left" << endl;
+ # #endif
+ # reject();
+ # }
+ # };
+
+ def shift_right
+ [`> `>]
+ # try {
+ # if ( $2->leader != 0 ) {
+ # #ifdef LOG_REDUCE
+ # cerr << "rejecting shift right" << endl;
+ # #endif
+ # reject();
+ # }
+ # };
+
+ #
+ # Templates
+ #
+
+ def template_declaration
+ [template_declaration_params declaration]
+ {
+ templDecl.pop()
+ templateParamNs.pop()
+ }
+
+ def template_declaration_params
+ [`template `< tpl_start template_parameter_list `>]
+ {
+ templDecl.push( 1 )
+ }
+
+ | [`export `template `< tpl_start template_parameter_list `>]
+ {
+ templDecl.push( 1 )
+ }
+
+ def tpl_start
+ []
+ {
+ # Create a new scope for the template parameters.
+ newTemplateParamScope: ptr<lang_object> =
+ createLangObject( 0 '<tpl_start>' lookupNs.top )
+ templateParamNs.push( newTemplateParamScope )
+ }
+
+ def template_parameter_list
+ [template_parameter_list `, template_parameter]
+ | [template_parameter]
+
+ def template_parameter
+ [type_parameter]
+ | [template_parameter_declaration]
+
+ def template_parameter_declaration
+ [declaration_start template_parameter_declaration_forms declaration_end]
+
+ def template_parameter_declaration_forms
+ [decl_specifier_mult_seq param_maybe_declarator maybe_parameter_init]
+
+ | [temp_param_decl_specifier_sing decl_specifier_mult_seq_opt
+ param_maybe_declarator maybe_parameter_init]
+
+ | [decl_specifier_mult_seq temp_param_decl_specifier_sing
+ decl_specifier_mult_seq_opt param_maybe_declarator maybe_parameter_init]
+
+ def temp_param_decl_specifier_sing
+ [temp_param_type_specifier_sing]
+
+ # Template parameters cannot support elaborated type specifer or class specifier.
+ def temp_param_type_specifier_sing
+ [templ_simple_type_specifier]
+ | [enum_specifier]
+
+ def templ_simple_type_specifier
+ [simple_type_specifier_name]
+ | [simple_type_specifier_kw_seq]
+
+ def type_parameter
+ [`class type_param_id type_param_init_opt]
+ {
+ Id: lookup_id = lookup_id in r2
+ if Id {
+ # The lookup ns should be a template param scope.
+ newClass: ptr<lang_object> =
+ createLangObject( ClassType Id.data lookupNs.top )
+ insertObject( templateParamNs.top Id.data newClass )
+ }
+ }
+
+ | [`typename type_param_id type_param_init_opt]
+ {
+ Id: lookup_id = lookup_id in r2
+ if Id {
+ # The lookup ns should be a template param scope.
+ newClass: ptr<lang_object> =
+ createLangObject( ClassType Id.data lookupNs.top )
+ insertObject( templateParamNs.top Id.data newClass )
+ }
+ }
+
+ | [`template `< tpl_start template_parameter_list `>
+ `class type_param_id templ_type_param_init_opt]
+ {
+ Id: lookup_id = lookup_id in r7
+ if Id {
+ newClass: ptr<lang_object> =
+ createLangObject( TemplateClassType Id.data lookupNs.top )
+ insertObject( templateParamNs.top Id.data newClass )
+ }
+ }
+
+ def templ_type_param_init_opt
+ [`= id_expression]
+ | []
+
+ def type_param_init_opt
+ [`= type_id]
+ | []
+
+ def type_param_id
+ [namespace_id]
+ | [typedef_id]
+ | [enum_id]
+ | [class_id]
+ | [templ_class_id]
+ | [identifier]
+ | [template_id]
+ | [unknown_id]
+ | []
+
+ def template_argument_list_opt
+ [template_argument_list]
+ | []
+
+ def template_argument_list
+ [template_argument_list `, template_argument]
+ | [template_argument]
+
+ def template_argument
+ [type_id]
+ | [assignment_expression]
+
+ def explicit_instantiation
+ [`template declaration]
+ | [declaration_start decl_specifier_mult_seq `template declaration declaration_end]
+
+ def explicit_specialization
+ [`template `< `> declaration]
+
+ ## Not sure what this one is about?
+ #explicit_specialization:
+ # declaration_start decl_specifier_mult_seq KW_Template '<' '>'
+ # declaration declaration_end;
+
+
+ #
+ # Original namespace definition
+ #
+
+ def original_namespace_definition
+ [orig_namespace_def_name `{ declaration* namespace_end `}]
+
+ def orig_namespace_def_name [`namespace unknown_id]
+ {
+ match r2 [Id: lookup_id]
+ nspace: ptr<lang_object> = createLangObject(
+ NamespaceType Id.data lookupNs.top )
+
+ # Insert the new object into the dictionary of the parent.
+ insertObject( curNamespace.top Id.data nspace )
+
+ # Push the namespace
+ curNamespace.push( nspace )
+ declNs.push( nspace )
+ lookupNs.push( nspace )
+
+ # LOG print( 'created original namespace: ' Id.data '\n' )
+ }
+
+ def namespace_end []
+ {
+ # Pop the namespace.
+ curNamespace.pop()
+ declNs.pop()
+ lookupNs.pop()
+
+ # LOG print( 'closed namespace\n' )
+ }
+
+ #
+ # Extension namespace definition
+ #
+
+ def extension_namespace_definition
+ [ext_namespace_def_name `{ declaration* namespace_end `}]
+
+ def ext_namespace_def_name [`namespace namespace_id]
+ {
+ match r2 [Id: lookup_id]
+ nspace: ptr<lang_object> = Id.obj
+
+ # Push the namespace
+ curNamespace.push( nspace )
+ declNs.push( nspace )
+ lookupNs.push( nspace )
+
+ # LOG print( 'found extended namespace: ' Id.data '\n' )
+ }
+
+ #
+ # Unnamed namespace definition
+ #
+ def unnamed_namespace_definition
+ [unnamed_namespace_def_name `{ declaration* namespace_end `}]
+
+ def unnamed_namespace_def_name [`namespace]
+ {
+ nspace: ptr<lang_object> = createLangObject(
+ NamespaceType '<unnamed_namespace>'
+ lookupNs.top )
+
+ # Push the namespace
+ curNamespace.push( nspace )
+ declNs.push( nspace )
+ lookupNs.push( nspace )
+
+ # LOG print( 'parsed unnamed namespace\n' )
+ }
+
+ #
+ # linkage_specification
+ #
+ def linkage_specification
+ [`extern TK_DoubleLit `{ declaration* `}]
+ | [`extern TK_DoubleLit declaration]
+
+ #
+ # Exception Handling.
+ #
+
+ def try_block
+ [`try compound_statement handler_seq]
+
+ def handler_seq
+ [handler_seq handler]
+ | [handler]
+
+ def handler
+ [`catch `( exception_declaration `) compound_statement]
+
+ def exception_declaration
+ [type_specifier_seq declarator]
+ | [type_specifier_seq abstract_declarator]
+ | [type_specifier_seq]
+ | [`...]
+
+ def throw_expression
+ [`throw assignment_expression]
+ | [`throw]
+
+ def exception_specification_opt
+ [exception_specification]
+ | []
+
+ def exception_specification
+ [`throw `( type_id_list_opt `)]
+
+ def type_id_list_opt
+ [type_id_list]
+ | []
+
+ def type_id_list
+ [type_id_list `, type_id]
+ | [type_id]
+
+ def start
+ [declaration*]
+
+ #
+ # Grammar done.
+ #
+
+ int printObject( indent: str obj: ptr<lang_object> )
+ {
+ print( indent obj->name )
+
+ if obj->objectMap.length > 0
+ print( ' {\n' )
+
+ ChildNames: map<str list<ptr<lang_object>>> = obj->objectMap
+ for MapEl: list<ptr<lang_object>> in child( ChildNames ) {
+ for Obj: ptr<lang_object> in MapEl
+ printObject( indent + ' ' Obj )
+ }
+
+ if obj->objectMap.length > 0
+ print( indent '}' )
+
+ print( '\n' )
+ }
+
+end # lookup
+
+#
+# Global data declarations
+#
+
+cons Lookup: lookup[]
+
+# Constants for language object types.
+Lookup.NamespaceType = typeid<lookup::namespace_id>
+Lookup.ClassType = typeid<lookup::class_id>
+Lookup.TemplateClassType = typeid<lookup::templ_class_id>
+Lookup.EnumType = typeid<lookup::enum_id>
+Lookup.IdType = typeid<lookup::identifier>
+Lookup.TypedefType = typeid<lookup::typedef_id>
+Lookup.TemplateIdType = typeid<lookup::template_id>
+
+
+# Object stacks.
+Lookup.curNamespace = construct list<ptr<lookup::lang_object>> []
+Lookup.declNs = construct list<ptr<lookup::lang_object>> []
+Lookup.lookupNs = construct list<ptr<lookup::lang_object>> []
+Lookup.qualNs = construct list<ptr<lookup::lang_object>> []
+Lookup.templateParamNs = construct list<ptr<lookup::lang_object>> []
+
+# Declaration, declarator data.
+Lookup.declarationData = construct list<lookup::declaration_data> []
+Lookup.declaratorData = construct list<lookup::declarator_data> []
+
+# Template declarations
+Lookup.templDecl = construct list<int> []
+
+# Root namespace object
+Lookup.rootNamespace = createLangObject( Lookup.NamespaceType '<root_namespace>' nil )
+
+# Initialize the namespace and declaration stacks with the root namespace
+Lookup.curNamespace.push( Lookup.rootNamespace )
+Lookup.declNs.push( Lookup.rootNamespace )
+Lookup.lookupNs.push( Lookup.rootNamespace )
+
+# Start with no qualification (note variables are initialized to zero)
+Lookup.qualNs.push( nil )
+
+Lookup.templDecl.push( 0 )
+Lookup.declarationData.push( construct lookup::declaration_data( 0 0 0 ) [] )
+
+parse S: lookup::start( Lookup )[ stdin ]
+if ! S {
+ print( error )
+ exit( 1 )
+}
+
+print( '***** NAMSPACES *****\n' )
+printObject( '' Lookup.rootNamespace )
+print( '***** UNKNOWN DECLARATORS *****\n' )
+for DI: lookup::declarator_id in S {
+ if match DI
+ [lookup::root_qual_opt lookup::nested_name_specifier_opt lookup::`~ UID: lookup::unknown_id]
+ {
+ print( UID '\n' )
+ }
+}
+##### IN #####
+namespace ns1
+{
+ namespace sub1 { class A {}; }
+ namespace sub2 { class B {}; }
+}
+
+namespace ns2
+{
+ int i = b;
+ class C
+ {
+ };
+
+ using namespace ns1;
+}
+
+ns2::sub1::A a;
+
+struct A
+{
+ struct B {};
+};
+
+struct C
+{
+ struct D : virtual public A {};
+};
+
+C::D::A d;
+
+C c;
+
+struct C
+{
+
+};
+
+enum E
+{
+ C,
+ b
+};
+
+E e;
+
+enum E
+{
+ C,
+ b
+};
+
+
+int i;
+class C
+{
+ int j;
+};
+
+class D
+{
+ int ~D();
+};
+
+int C::k;
+int C::~C;
+
+typedef int Int;
+
+class C {};
+void ~C( );
+void C::operator +( int i );
+
+int i;
+
+//void operator C( void k );
+
+class C
+{
+
+};
+
+int C::f( int i, int j( void v ) );
+class C
+{
+ class D {};
+
+ typedef C I;
+
+ I::D i;
+};
+
+C c;
+
+void function( int i, int j )
+{
+ function();
+}
+
+
+
+class B { class Find {}; };
+
+typedef B T;
+
+class C : public T
+{
+ Find find;
+};
+
+
+template <class X> struct Y
+{
+ X t;
+ void f();
+};
+
+template <class X> void Y<X>::f();
+template <class X> struct Y
+{
+ class Z {};
+};
+
+class Y<int>
+{
+ int i;
+};
+
+//void f( class C<int> i, int j );
+
+int f( int (*) [](), void );
+void f();
+class C
+{
+ class D {};
+ void g();
+};
+
+//typename C c;
+
+class C
+{
+ class D {};
+ int f();
+};
+
+int f()
+{
+}
+
+int C::f()
+{
+ D d;
+}
+##### EXP #####
+***** NAMSPACES *****
+<root_namespace> {
+ A {
+ B
+ }
+ B {
+ Find
+ }
+ C {
+ D
+ I
+ f
+ g
+ i
+ j
+ find
+ }
+ C
+ C
+ C
+ D
+ E
+ E
+ T
+ Y {
+ Z
+ f
+ t
+ }
+ a
+ b
+ b
+ c
+ c
+ d
+ e
+ f
+ f
+ f
+ i
+ i
+ Int
+ ns1 {
+ sub1 {
+ A
+ }
+ sub2 {
+ B
+ }
+ }
+ ns2 {
+ C
+ i
+ }
+ function
+}
+***** UNKNOWN DECLARATORS *****
+C
diff --git a/test/mailbox.lm b/test/mailbox.lm
new file mode 100644
index 0000000..ca3b9a9
--- /dev/null
+++ b/test/mailbox.lm
@@ -0,0 +1,106 @@
+##### LM #####
+
+# lines, and fromlines
+lex
+ rl day /[A-Z][a-z][a-z]/
+ rl month /[A-Z][a-z][a-z]/
+ rl year /[0-9][0-9][0-9][0-9]/
+ rl time /[0-9][0-9] ':' [0-9][0-9] ( ':' [0-9][0-9] )? /
+ rl letterZone /[A-Z][A-Z][A-Z]/
+ rl numZone /[+\-][0-9][0-9][0-9][0-9]/
+ rl zone / letterZone | numZone/
+ rl dayNum /[0-9 ][0-9]/
+
+ # These are the different formats of the date minus an obscure
+ # type that has a funny string 'remote from xxx' on the end. Taken
+ # from c-client in the imap-2000 distribution.
+ rl date / day ' ' month ' ' dayNum ' ' time ' '
+ ( year | year ' ' zone | zone ' ' year ) /
+
+ # From lines separate messages. We will exclude from_line from a message
+ # body line. This will cause us to stay in message line up until an
+ # entirely correct from line is matched.
+ token from_line / 'From ' (any-'\n')* ' ' date '\n' /
+ token simple_line / [^\n]* '\n' /
+end
+
+rl hchar /print - [ :]/
+token header_name /hchar+/
+
+token colon /':' ' '*/
+token header_content / ([^\n] | '\n' [ \t])* '\n'/
+token blank_line / '\n' /
+
+def header
+ [header_name colon header_content]
+
+def message
+ [from_line header* blank_line simple_line*]
+
+def start
+ [message*]
+
+parse S: start[ stdin ]
+print_xml( S )
+print( '\n' )
+##### IN #####
+From thurston Tue Jan 2 21:16:50 2007
+Return-Path: <unknown>
+X-Spam-Level: *
+Received: from [109.111.71.111] (helo=twfmtr)
+ by zifreax with smtp (Exim 4.43)
+ id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
+Message-ID: <459B113F.8050903@immoarthabitatge.com>
+X-Keywords:
+X-UID: 1
+
+Content-Type: text/html; charset=ISO-8859-1
+</body>
+</html>
+
+From thurston Wed Jan 3 02:35:48 2007
+Return-Path: <unknown>
+X-Spam-Checker-Version: SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
+X-Spam-Level: **
+X-Spam-Status: No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE,
+ HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET
+ autolearn=no version=3.1.1
+X-Bogosity: Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
+Status: RO
+X-UID: 2
+
+------=_NextPart_000_0010_01C72F11.F137BD60
+ charset="windows-1252"
+Content-Transfer-Encoding: quoted-printable
+
+##### EXP #####
+<start><_repeat_message><message><from_line>From thurston Tue Jan 2 21:16:50 2007
+</from_line><_repeat_header><header><header_name>Return-Path</header_name><colon>: </colon><header_content>&lt;unknown&gt;
+</header_content></header><header><header_name>X-Spam-Level</header_name><colon>: </colon><header_content>*
+</header_content></header><header><header_name>Received</header_name><colon>: </colon><header_content>from [109.111.71.111] (helo=twfmtr)
+ by zifreax with smtp (Exim 4.43)
+ id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
+</header_content></header><header><header_name>Message-ID</header_name><colon>: </colon><header_content>&lt;459B113F.8050903@immoarthabitatge.com&gt;
+</header_content></header><header><header_name>X-Keywords</header_name><colon>: </colon><header_content>
+</header_content></header><header><header_name>X-UID</header_name><colon>: </colon><header_content>1
+</header_content></header></_repeat_header><blank_line>
+</blank_line><_repeat_simple_line><simple_line>Content-Type: text/html; charset=ISO-8859-1
+</simple_line><simple_line>&lt;/body&gt;
+</simple_line><simple_line>&lt;/html&gt;
+</simple_line><simple_line>
+</simple_line></_repeat_simple_line></message><message><from_line>From thurston Wed Jan 3 02:35:48 2007
+</from_line><_repeat_header><header><header_name>Return-Path</header_name><colon>: </colon><header_content>&lt;unknown&gt;
+</header_content></header><header><header_name>X-Spam-Checker-Version</header_name><colon>: </colon><header_content>SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
+</header_content></header><header><header_name>X-Spam-Level</header_name><colon>: </colon><header_content>**
+</header_content></header><header><header_name>X-Spam-Status</header_name><colon>: </colon><header_content>No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE,
+ HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET
+ autolearn=no version=3.1.1
+</header_content></header><header><header_name>X-Bogosity</header_name><colon>: </colon><header_content>Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
+</header_content></header><header><header_name>Status</header_name><colon>: </colon><header_content>RO
+</header_content></header><header><header_name>X-UID</header_name><colon>: </colon><header_content>2
+</header_content></header></_repeat_header><blank_line>
+</blank_line><_repeat_simple_line><simple_line>------=_NextPart_000_0010_01C72F11.F137BD60
+</simple_line><simple_line> charset="windows-1252"
+</simple_line><simple_line>Content-Transfer-Encoding: quoted-printable
+</simple_line><simple_line>
+</simple_line></_repeat_simple_line></message></_repeat_message></start>
diff --git a/test/matchex.lm b/test/matchex.lm
new file mode 100644
index 0000000..9dd24c4
--- /dev/null
+++ b/test/matchex.lm
@@ -0,0 +1,41 @@
+##### LM #####
+lex
+ token id /[a-zA-Z_][a-zA-Z0-9_]*/
+ literal `= `< `> `/
+ ignore /[ \t\n\r\v]+/
+end
+
+def attr
+ [id `= id]
+
+def open_tag
+ [`< id attr* `>]
+
+def close_tag
+ [`< `/ id `>]
+
+def tag
+ [open_tag item* close_tag]
+
+def item
+ [tag]
+| [id]
+
+parse Tag: tag[ stdin ]
+
+# Style: List of literal text and types.
+match Tag ["<person name=" Val1:id attr*">" item* "</person>"]
+
+# Style: Literal text with embedded lists of types.
+match Tag "<person name=[Val2:id attr*]>[item*]</person>"
+
+print( ^Val1 '\n' )
+print( ^Val2 '\n' )
+
+##### IN #####
+<person name=adrian hometown=kingston>
+ <t1 foo=bar2 e=f></t2>
+</person>
+##### EXP #####
+adrian
+adrian
diff --git a/test/maxlen.lm b/test/maxlen.lm
new file mode 100644
index 0000000..2d220d1
--- /dev/null
+++ b/test/maxlen.lm
@@ -0,0 +1,57 @@
+##### LM #####
+
+context maxlen
+
+ #
+ # Regular Definitions
+ #
+ rl rl_ws /[ \t\n\r\v]+/
+ rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+
+ #
+ # Tokens
+ #
+
+ lex
+ ignore /rl_ws/
+ token id /rl_id/
+ end
+
+ num: int
+ allow: int
+
+ def item
+ [id]
+ {
+ num = num + 1
+ toomuch: int = allow+1
+ if num == toomuch {
+ reject
+ }
+ }
+
+ def open
+ []
+ {
+ num = 0
+ }
+
+ def close []
+
+ def restricted_list
+ [open item*]
+
+ def start
+ [restricted_list id*]
+end # maxlen
+
+cons MaxLen: maxlen[]
+MaxLen.allow = 3
+
+parse S: maxlen::start(MaxLen)[stdin]
+print_xml( S )
+print('\n')
+##### IN #####
+a b c d e f g
+##### EXP #####
+<maxlen::start><maxlen::restricted_list><maxlen::open></maxlen::open><maxlen::_repeat_item><maxlen::item><maxlen::id>a</maxlen::id></maxlen::item><maxlen::item><maxlen::id>b</maxlen::id></maxlen::item><maxlen::item><maxlen::id>c</maxlen::id></maxlen::item></maxlen::_repeat_item></maxlen::restricted_list><maxlen::_repeat_id><maxlen::id>d</maxlen::id><maxlen::id>e</maxlen::id><maxlen::id>f</maxlen::id><maxlen::id>g</maxlen::id></maxlen::_repeat_id></maxlen::start>
diff --git a/test/multiregion1.lm b/test/multiregion1.lm
new file mode 100644
index 0000000..5c8bdea
--- /dev/null
+++ b/test/multiregion1.lm
@@ -0,0 +1,242 @@
+##### LM #####
+
+
+token newline / '\n' /
+token index / 'Index:' [ \t]* /
+token consume_line / [^\n]* /
+
+
+def index_stmt [index consume_line newline]
+
+token separator_line / '='+ '\n' /
+
+# Whitespace separated word list
+lex
+ token word /[^\t \n]+/
+ ignore /[\t ]+/
+
+ def word_list
+ [word word_list]
+ | []
+end
+
+token old_file_start / '---' [\t ]+ /
+token new_file_start / '+++' [\t ]+ /
+
+def old_file
+ [old_file_start word_list newline]
+
+def new_file
+ [new_file_start word_list newline]
+
+def file_header
+ [index_stmt separator_line old_file new_file]
+
+token hunk_header / '@@' any* :>> '@@' '\n' /
+token hunk_line / ( ' ' | '-' | '+' ) [^\n]* '\n' /
+
+def hunk_body
+ [hunk_line*]
+
+def hunk
+ [hunk_header hunk_body]
+
+# diff of a single file: header followed by a hunk list.
+def file_diff
+ [file_header hunk*]
+
+def start
+ [file_diff*]
+
+parse S: start[ stdin ]
+
+for OF: old_file in S {
+ # Get the first word and check if it is
+ # the file we are interested in.
+ if match OF [
+ "--- fsmrun.cpp"
+ Rest: word_list
+ "\n"
+ ]
+ {
+ OF = construct old_file
+ ["--- newfilename.cpp " Rest "\n"]
+ }
+}
+
+print( S )
+
+##### IN #####
+Index: fsmrun.cpp
+===================================================================
+--- fsmrun.cpp (revision 4555)
++++ fsmrun.cpp (working copy)
+@@ -150,7 +150,7 @@
+ peof = 0;
+ if ( parser != 0 ) {
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+ else {
+ region = 0;
+@@ -189,7 +189,7 @@
+
+ tokstart = 0;
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+
+ void FsmRun::sendToken( int id )
+@@ -222,7 +222,7 @@
+ parser = newParser;
+
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+ else {
+ #ifdef LOG_ACTIONS
+@@ -355,7 +355,7 @@
+
+ /* Set the current state from the next region. */
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+ }
+
+@@ -452,7 +452,7 @@
+ /* First thing check for error. */
+ if ( cs == tables->errorState ) {
+ if ( parser != 0 ) {
+- if ( getStateFromNextRegion( 1 ) != 0 ) {
++ if ( parser->getNextRegion( 1 ) != 0 ) {
+ #ifdef LOG_BACKTRACK
+ cerr << "scanner failed, trying next region" << endl;
+ #endif
+@@ -462,7 +462,7 @@
+
+ parser->nextRegionInd += 1;
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ cerr << "new token region: " <<
+ parser->tables->gbl->regionInfo[region].name << endl;
+ continue;
+@@ -495,7 +495,7 @@
+ }
+ else {
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ cerr << "new token region: " <<
+ parser->tables->gbl->regionInfo[region].name << endl;
+ continue;
+Index: junk.cpp
+===================================================================
+---
++++ junk.cpp (working copy)
+Index: fsmrun.h
+===================================================================
+--- fsmrun.h (revision 4557)
++++ fsmrun.h (working copy)
+@@ -197,10 +197,6 @@
+ void runOnInputStream( PdaRun *parser, InputStream &in );
+ void execute();
+
+- /* Offset can be used to look at the next nextRegionInd. */
+- int getStateFromNextRegion( int offset = 0 )
+- { return tables->entryByRegion[parser->getNextRegion(offset)]; }
+-
+ FsmTables *tables;
+ PdaRun *parser;
+ InputStream *inputStream;
+##### EXP #####
+Index: fsmrun.cpp
+===================================================================
+--- newfilename.cpp (revision 4555)
++++ fsmrun.cpp (working copy)
+@@ -150,7 +150,7 @@
+ peof = 0;
+ if ( parser != 0 ) {
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+ else {
+ region = 0;
+@@ -189,7 +189,7 @@
+
+ tokstart = 0;
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+
+ void FsmRun::sendToken( int id )
+@@ -222,7 +222,7 @@
+ parser = newParser;
+
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+ else {
+ #ifdef LOG_ACTIONS
+@@ -355,7 +355,7 @@
+
+ /* Set the current state from the next region. */
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ }
+ }
+
+@@ -452,7 +452,7 @@
+ /* First thing check for error. */
+ if ( cs == tables->errorState ) {
+ if ( parser != 0 ) {
+- if ( getStateFromNextRegion( 1 ) != 0 ) {
++ if ( parser->getNextRegion( 1 ) != 0 ) {
+ #ifdef LOG_BACKTRACK
+ cerr << "scanner failed, trying next region" << endl;
+ #endif
+@@ -462,7 +462,7 @@
+
+ parser->nextRegionInd += 1;
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ cerr << "new token region: " <<
+ parser->tables->gbl->regionInfo[region].name << endl;
+ continue;
+@@ -495,7 +495,7 @@
+ }
+ else {
+ region = parser->getNextRegion();
+- cs = getStateFromNextRegion();
++ cs = tables->entryByRegion[region];
+ cerr << "new token region: " <<
+ parser->tables->gbl->regionInfo[region].name << endl;
+ continue;
+Index: junk.cpp
+===================================================================
+---
++++ junk.cpp (working copy)
+Index: fsmrun.h
+===================================================================
+--- fsmrun.h (revision 4557)
++++ fsmrun.h (working copy)
+@@ -197,10 +197,6 @@
+ void runOnInputStream( PdaRun *parser, InputStream &in );
+ void execute();
+
+- /* Offset can be used to look at the next nextRegionInd. */
+- int getStateFromNextRegion( int offset = 0 )
+- { return tables->entryByRegion[parser->getNextRegion(offset)]; }
+-
+ FsmTables *tables;
+ PdaRun *parser;
+ InputStream *inputStream;
diff --git a/test/multiregion2.lm b/test/multiregion2.lm
new file mode 100644
index 0000000..d69b8d4
--- /dev/null
+++ b/test/multiregion2.lm
@@ -0,0 +1,124 @@
+##### LM #####
+#
+# Character classes
+#
+rl CTL /0..31 | 127/
+rl CR /13/
+rl LF /10/
+rl SP /32/
+rl HT /9/
+rl CHAR /0..127/
+
+rl separators / '(' | ')' | '<' | '>'
+ | '@' | ',' | ';' | ':' | '\\'
+ | '"' | '/' | '[' | ']' | '?'
+ | '=' | '{' | '}' | SP | HT /
+
+rl token_char /CHAR - CTL - separators/
+
+#
+# Literal tokens
+#
+
+literal `HTTP/ `:
+token SPT /' '/
+token CRLF /CR LF/
+
+#
+# Request Line
+#
+
+token method /token_char+/
+
+token request_uri /(^SP)+/
+
+token http_number /digit+ '.' digit+/
+
+def http_version
+ [ `HTTP/ http_number ]
+
+def request_line
+ [method SPT request_uri
+ SPT http_version CRLF]
+
+#
+# Header
+#
+
+token field_name /token_char+/
+
+lex
+ token fv_plain /(^(CR|LF))*/
+ token fv_ext /CR LF (SP|HT)/
+ token fv_term /CR LF/
+end
+
+def fv
+ [fv_plain]
+| [fv_ext]
+
+def field_value
+ [fv* fv_term]
+
+def header
+ [field_name `: field_value]
+
+#
+# Request
+#
+
+def request
+ [request_line header* CRLF]
+
+parse R: request*[ stdin ]
+
+if !R {
+ print( error )
+ exit( 1 )
+}
+
+for FV: fv in R {
+ if match FV [fv_ext]
+ FV = cons fv " "
+}
+
+print( R )
+
+##### IN #####
+GET /hi/there/ HTTP/1.1
+
+GET /hithere/ HTTP/1.1
+Host: localhost:3535
+User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.12) Gecko/20080207 Ubuntu/7.10 (gutsy) Firefox/2.0.0.12
+Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
+Accept-Language: en-us,en;q=0.5
+Accept-Encoding: gzip,deflate
+Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
+Keep-Alive: 300
+Connection: keep-alive
+Cache-Control: max-age=0
+
+GET foo HTTP/1.1
+hello: foo
+hi: there
+ my
+ friend
+
+##### EXP #####
+GET /hi/there/ HTTP/1.1
+
+GET /hithere/ HTTP/1.1
+Host: localhost:3535
+User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.12) Gecko/20080207 Ubuntu/7.10 (gutsy) Firefox/2.0.0.12
+Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
+Accept-Language: en-us,en;q=0.5
+Accept-Encoding: gzip,deflate
+Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
+Keep-Alive: 300
+Connection: keep-alive
+Cache-Control: max-age=0
+
+GET foo HTTP/1.1
+hello: foo
+hi: there my friend
+
diff --git a/test/mutualrec.lm b/test/mutualrec.lm
new file mode 100644
index 0000000..2eafd28
--- /dev/null
+++ b/test/mutualrec.lm
@@ -0,0 +1,18 @@
+##### LM #####
+
+int f1( i: int )
+{
+ return f2( i + 1 ) + 1
+}
+
+int f2( i: int )
+{
+ if i < 10
+ return f1( i + 1 ) + 1
+ else
+ return i
+}
+
+print( "f1() = [$f1(0)]\n" )
+##### EXP #####
+f1() = 22
diff --git a/test/namespace1.lm b/test/namespace1.lm
new file mode 100644
index 0000000..b31b453
--- /dev/null
+++ b/test/namespace1.lm
@@ -0,0 +1,24 @@
+##### LM #####
+namespace n1
+
+ lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+ end
+
+ def start
+ [id*]
+end
+
+parse P: n1::start[stdin]
+print( P )
+##### IN #####
+a
+ b
+ c
+d
+##### EXP #####
+a
+ b
+ c
+d
diff --git a/test/nestedcomm.lm b/test/nestedcomm.lm
new file mode 100644
index 0000000..1b1b6a4
--- /dev/null
+++ b/test/nestedcomm.lm
@@ -0,0 +1,55 @@
+##### LM #####
+#
+# Tokens
+#
+
+# Any single character can be a literal
+lex
+ # Ignore whitespace.
+ ignore /[ \t\n\r\v]+/
+
+ # Open and close id
+ token id /[a-zA-Z_][a-zA-Z0-9_]*/
+
+ token open_paren /'('/
+ {
+ parse_stop NC: nested_comment[ stdin ]
+ print( %NC '\n' )
+ input.push_ignore( NC )
+ }
+end
+
+#
+# Token translation
+#
+
+lex
+ literal `( `)
+ token nc_data /[^()]+/
+end
+
+def nc_item
+ [nc_data]
+| [nested_comment]
+
+def nested_comment
+ [`( nc_item* `)]
+
+def nested [id*]
+
+parse P: nested[ stdin ]
+
+print( ^P '\n' )
+print_xml( ^P )
+print( '\n' )
+print_xml_ac( ^P )
+print( '\n' )
+print( ^P '\n' )
+##### IN #####
+hello there ( (this is a nested comment /*sdf;asd_++_stuff) ) and this is not
+##### EXP #####
+( (this is a nested comment /*sdf;asd_++_stuff) )
+hello there ( (this is a nested comment /*sdf;asd_++_stuff) ) and this is not
+<nested><_repeat_id><id>hello</id><id>there</id><id>and</id><id>this</id><id>is</id><id>not</id></_repeat_id></nested>
+<nested><_repeat_id><id>hello</id><_ignore_0001> </_ignore_0001><id>there</id><_ignore_0001> </_ignore_0001><nested_comment><_literal_0007>(</_literal_0007><_repeat_nc_item><nc_item><nc_data> </nc_data></nc_item><nc_item><nested_comment><_literal_0007>(</_literal_0007><_repeat_nc_item><nc_item><nc_data>this is a nested comment /*sdf;asd_++_stuff</nc_data></nc_item></_repeat_nc_item><_literal_0009>)</_literal_0009></nested_comment></nc_item><nc_item><nc_data> </nc_data></nc_item></_repeat_nc_item><_literal_0009>)</_literal_0009></nested_comment><_ignore_0001> </_ignore_0001><id>and</id><_ignore_0001> </_ignore_0001><id>this</id><_ignore_0001> </_ignore_0001><id>is</id><_ignore_0001> </_ignore_0001><id>not</id></_repeat_id></nested>
+hello there ( (this is a nested comment /*sdf;asd_++_stuff) ) and this is not
diff --git a/test/order1.lm b/test/order1.lm
new file mode 100644
index 0000000..e510bbb
--- /dev/null
+++ b/test/order1.lm
@@ -0,0 +1,115 @@
+##### LM #####
+
+lex
+ token c_single_lit /( 'L'? "'" ( [^'\\\n] | '\\' any )* "'" )/
+ token c_double_lit /( 'L'? '"' ( [^"\\\n] | '\\' any )* '"' )/
+
+ token sym / ';' | ',' | '=' | '(' | ')' | ':' | '&' | '*' |
+ '[' | ']' | '~' | '+' | '-' | '/' | '<' | '>' | '|' |
+ '^' | '%' | '!' | '?' | '.' | '#'/
+
+ # Identifiers
+ token c_id /( [a-zA-Z_] [a-zA-Z0-9_]* )/
+
+ # Comments and whitespace.
+ token comm_c /( '/*' (any | '\n')* :>> '*/' )/
+ token comm_cxx /( '//' any* :> '\n' )/
+ token ws /( any - 33..126 )+/
+end
+
+def c_token
+ [c_single_lit]
+| [c_double_lit]
+| [sym]
+| [c_id]
+| [comm_c]
+| [comm_cxx]
+| [ws]
+
+def c_token_list
+ [c_token c_token_list]
+| [c_token]
+
+# Can parse this, use ful for single constructs.
+def c
+ [c_token*]
+
+literal `%%
+
+lex
+ literal `{ `}
+ literal `protocol `client `server `port `by `tcp `udp
+ token id /[A-Za-z_][A-Za-z_0-9]*/
+ token number /[0-9]+/
+
+ ignore /'/*' any* :>> '*/'/
+ ignore /[ \t\r\n]+/
+end
+
+def tcp_by_port
+ [`tcp `by `port]
+
+def udp_by_port
+ [`udp `by `port]
+
+def attribute
+ [`client id]
+| [`server id]
+| [`port number]
+| [`udp id]
+| [tcp_by_port]
+| [udp_by_port]
+
+def tcp_protocol
+ [`tcp `protocol id `{ attribute* `}]
+
+def udp_protocol
+ [`udp `protocol id `{ attribute* `}]
+
+def protocol
+ [tcp_protocol]
+| [udp_protocol]
+
+def program
+ [c `%% protocol*]
+
+alias output accum<c>
+
+def port
+ Port: int
+ Protocol: str
+ []
+
+# Parse the input.
+parse P: program[ stdin ]
+
+Output: output Output = construct output []
+
+# Take off the leading C from the input file and send it out.
+match P [C: c `%% protocol*]
+
+send Output [$C]
+send Output
+ "#include <assert.h>
+ "
+ eos
+
+print( Output.tree )
+##### IN #####
+#include "some_header.h"
+
+%%
+
+tcp protocol FOO
+{
+ port 99
+
+ client c
+ server s
+}
+
+##### EXP #####
+#include "some_header.h"
+
+#include <assert.h>
+
diff --git a/test/order2.lm b/test/order2.lm
new file mode 100644
index 0000000..d91dd42
--- /dev/null
+++ b/test/order2.lm
@@ -0,0 +1,116 @@
+##### LM #####
+
+lex
+ token c_single_lit /( 'L'? "'" ( [^'\\\n] | '\\' any )* "'" )/
+ token c_double_lit /( 'L'? '"' ( [^"\\\n] | '\\' any )* '"' )/
+
+ token sym / ';' | ',' | '=' | '(' | ')' | ':' | '&' | '*' |
+ '[' | ']' | '~' | '+' | '-' | '/' | '<' | '>' | '|' |
+ '^' | '!' | '?' | '.' | '#'/
+
+ # Identifiers
+ token c_id /( [a-zA-Z_] [a-zA-Z0-9_]* )/
+
+ # Comments and whitespace.
+ token comm_c /( '/*' (any | '\n')* :>> '*/' )/
+ token comm_cxx /( '//' any* :> '\n' )/
+ token ws /( any - 33..126 )+/
+end
+
+def c_token
+ [c_single_lit]
+| [c_double_lit]
+| [sym]
+| [c_id]
+| [comm_c]
+| [comm_cxx]
+| [ws]
+
+def c_token_list
+ [c_token c_token_list]
+| [c_token]
+
+# Can parse this, use ful for single constructs.
+#def c
+# [c_token*]
+
+def c
+ [c_token_list]
+
+lex
+ literal `%%
+ literal `{ `}
+ literal `protocol `client `server `port `by `tcp `udp
+ token id /[A-Za-z_][A-Za-z_0-9]*/
+ token number /[0-9]+/
+
+ ignore /'/*' any* :>> '*/'/
+ ignore /[ \t\r\n]+/
+end
+
+def tcp_by_port
+ [`tcp `by `port]
+
+def udp_by_port
+ [`udp `by `port]
+
+def attribute
+ [`client id]
+| [`server id]
+| [`port number]
+| [`udp id]
+| [tcp_by_port]
+| [udp_by_port]
+
+def tcp_protocol
+ [`tcp `protocol id `{ attribute* `}]
+
+def udp_protocol
+ [`udp `protocol id `{ attribute* `}]
+
+def protocol
+ [tcp_protocol]
+| [udp_protocol]
+
+def program
+ [c `%% protocol*]
+
+alias output parser<c>
+
+def port
+ Port: int
+ Protocol: str
+ []
+
+# Parse the input.
+parse P: program[ stdin ]
+
+Output: output Output = construct output []
+
+# Take off the leading C from the input file and send it out.
+match P [C: c '%%' protocol*]
+
+send Output [
+ $C
+ "#include <assert.h>
+ "
+ ] eos
+print( Output.tree )
+##### IN #####
+#include "some_header.h"
+
+%%
+
+tcp protocol FOO
+{
+ port 99
+
+ client c
+ server s
+}
+
+##### EXP #####
+#include "some_header.h"
+
+#include <assert.h>
+
diff --git a/test/parse1.lm b/test/parse1.lm
new file mode 100644
index 0000000..f8ecab8
--- /dev/null
+++ b/test/parse1.lm
@@ -0,0 +1,14 @@
+##### LM #####
+lex
+ token id / [a-z] /
+ ignore / [\n\t ] /
+end
+
+def start [id*]
+
+parse S: start[stdin]
+print( S )
+##### IN #####
+ab cd ef
+##### EXP #####
+ab cd ef
diff --git a/test/prints.lm b/test/prints.lm
new file mode 100644
index 0000000..ad08220
--- /dev/null
+++ b/test/prints.lm
@@ -0,0 +1,17 @@
+##### LM #####
+lex
+ token word /[a-z]+/
+ ignore /[\t\n ]+/
+end
+
+def start
+ [word*]
+
+parse Start: start[stdin]
+
+prints( stderr 'fd stderr: ' ^Start '\n' )
+prints( stdout 'fd stdout: ' ^Start '\n' )
+##### IN #####
+a b c
+##### EXP #####
+fd stdout: a b c
diff --git a/test/pull1.lm b/test/pull1.lm
new file mode 100644
index 0000000..f86bd6c
--- /dev/null
+++ b/test/pull1.lm
@@ -0,0 +1,7 @@
+##### LM #####
+String: str = stdin.pull( 10 )
+print( String '\n' )
+##### IN #####
+this is input for a non-parse pull
+##### EXP #####
+this is in
diff --git a/test/pull2.lm b/test/pull2.lm
new file mode 100644
index 0000000..7b50092
--- /dev/null
+++ b/test/pull2.lm
@@ -0,0 +1,8 @@
+##### LM #####
+Stream: stream = open( 'working/pull2.in' ('r') )
+String: str = Stream.pull( 10 )
+print( String '\n' )
+##### IN #####
+this is input for a non-parse pull
+##### EXP #####
+this is in
diff --git a/test/ragelambig1.lm b/test/ragelambig1.lm
new file mode 100644
index 0000000..845a07b
--- /dev/null
+++ b/test/ragelambig1.lm
@@ -0,0 +1,72 @@
+##### LM #####
+lex
+ ignore /[\t\n ]+/
+ literal `^ `| `- `, `: `! `? `.
+ literal `( `) `{ `} `* `& `+
+
+ literal `-- `:> `:>> `<: `-> `**
+
+ token word /[a-zA-Z_][a-zA-Z0-9_]*/
+ token uint /[0-9]+/
+end
+
+
+def start
+ [expression]
+ {
+ print_xml( lhs )
+ }
+
+def expression
+ [expression `| term]
+| [expression `& term]
+| [expression `- term]
+| [expression `-- term]
+| [term]
+
+def term
+ [term factor_with_rep]
+ {
+ if match lhs [term `- uint]
+ reject
+ }
+| [term `. factor_with_rep]
+| [term `:> factor_with_rep]
+| [term `:>> factor_with_rep]
+| [term `<: factor_with_rep]
+| [factor_with_rep]
+
+def factor_with_rep
+ [factor_with_rep `*]
+| [factor_with_rep `**]
+| [factor_with_rep `?]
+| [factor_with_rep `+]
+| [factor_with_rep `{ factor_rep_num `}]
+| [factor_with_rep `{ `, factor_rep_num `}]
+| [factor_with_rep `{ factor_rep_num `, `}]
+| [factor_with_rep `{ factor_rep_num `, factor_rep_num `}]
+| [factor_with_neg]
+
+def factor_rep_num [uint]
+
+def factor_with_neg
+ [`! factor_with_neg]
+| [`^ factor_with_neg]
+| [factor]
+
+def factor
+ [alphabet_num]
+| [word]
+| [`( expression `)]
+
+def alphabet_num
+ [uint]
+| [`- uint]
+
+parse start[ stdin ]
+
+print( '\n' )
+##### IN #####
+1 - 1
+##### EXP #####
+<start><expression><expression><term><factor_with_rep><factor_with_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_with_neg></factor_with_rep></term></expression><_literal_0007>-</_literal_0007><term><factor_with_rep><factor_with_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_with_neg></factor_with_rep></term></expression></start>
diff --git a/test/ragelambig2.lm b/test/ragelambig2.lm
new file mode 100644
index 0000000..39602bc
--- /dev/null
+++ b/test/ragelambig2.lm
@@ -0,0 +1,72 @@
+##### LM #####
+lex
+ ignore /[\t\n ]+/
+ literal `^ `| `- `, `: `! `? `.
+ literal `( `) `{ `} `* `& `+
+
+ literal `-- `:> `:>> `<: `-> `**
+
+ token word /[a-zA-Z_][a-zA-Z0-9_]*/
+ token uint /[0-9]+/
+end
+
+
+def start
+ [expression]
+ {
+ print_xml( lhs )
+ }
+
+def expression
+ [expression `| term]
+| [expression `& term]
+| [expression `- term]
+| [expression `-- term]
+| [term]
+
+def term
+ [factor_with_rep more_term]
+
+# Can resolve the ambiguity by making more_term shortest match.
+def more_term
+ []
+| [factor_with_rep more_term]
+| [`. factor_with_rep more_term]
+| [`:> factor_with_rep more_term]
+| [`:>> factor_with_rep more_term]
+| [`<: factor_with_rep more_term]
+
+def factor_with_rep
+ [factor_with_rep `*]
+| [factor_with_rep `**]
+| [factor_with_rep `?]
+| [factor_with_rep `+]
+| [factor_with_rep `{ factor_rep_num `}]
+| [factor_with_rep `{ `, factor_rep_num `}]
+| [factor_with_rep `{ factor_rep_num `, `}]
+| [factor_with_rep `{ factor_rep_num `, factor_rep_num `}]
+| [factor_with_neg]
+
+def factor_rep_num
+ [uint]
+
+def factor_with_neg
+ [`! factor_with_neg]
+| [`^ factor_with_neg]
+| [factor]
+
+def factor
+ [alphabet_num]
+| [word]
+| [`( expression `)]
+
+def alphabet_num
+ [uint]
+| [`- uint]
+
+parse start[ stdin ]
+print( '\n' )
+##### IN #####
+1 - 1
+##### EXP #####
+<start><expression><expression><term><factor_with_rep><factor_with_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_with_neg></factor_with_rep><more_term></more_term></term></expression><_literal_0007>-</_literal_0007><term><factor_with_rep><factor_with_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_with_neg></factor_with_rep><more_term></more_term></term></expression></start>
diff --git a/test/ragelambig3.lm b/test/ragelambig3.lm
new file mode 100644
index 0000000..74b7254
--- /dev/null
+++ b/test/ragelambig3.lm
@@ -0,0 +1,72 @@
+##### LM #####
+lex
+ ignore /[\t\n ]+/
+ literal `^ `| `- `, `: `! `? `.
+ literal `( `) `{ `} `* `& `+
+
+ literal `-- `:> `:>> `<: `-> `**
+
+ token word /[a-zA-Z_][a-zA-Z0-9_]*/
+ token uint /[0-9]+/
+end
+
+
+def start
+ [expression]
+ {
+ print_xml( lhs )
+ }
+
+def expression
+ [expression `| term_short]
+| [expression `& term_short]
+| [expression `- term_short]
+| [expression `-- term_short]
+| [term_short]
+
+# Works, but is confusing.
+def term_short
+ reducefirst
+ [term]
+
+def term
+ [term factor_with_rep]
+| [term `. factor_with_rep]
+| [term `:> factor_with_rep]
+| [term `:>> factor_with_rep]
+| [term `<: factor_with_rep]
+| [factor_with_rep]
+
+def factor_with_rep
+ [factor_with_rep `*]
+| [factor_with_rep `**]
+| [factor_with_rep `?]
+| [factor_with_rep `+]
+| [factor_with_rep `{ factor_rep_num `}]
+| [factor_with_rep `{ `, factor_rep_num `}]
+| [factor_with_rep `{ factor_rep_num `, `}]
+| [factor_with_rep `{ factor_rep_num `, factor_rep_num `}]
+| [factor_with_neg]
+
+def factor_rep_num [uint]
+
+def factor_with_neg
+ [`! factor_with_neg]
+| [`^ factor_with_neg]
+| [factor]
+
+def factor
+ [alphabet_num]
+| [word]
+| [`( expression `)]
+
+def alphabet_num
+ [uint]
+| [`- uint]
+
+parse start[ stdin ]
+print( '\n' )
+##### IN #####
+1 - 1
+##### EXP #####
+<start><expression><expression><term_short><term><factor_with_rep><factor_with_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_with_neg></factor_with_rep></term></term_short></expression><_literal_0007>-</_literal_0007><term_short><term><factor_with_rep><factor_with_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_with_neg></factor_with_rep></term></term_short></expression></start>
diff --git a/test/ragelambig4.lm b/test/ragelambig4.lm
new file mode 100644
index 0000000..e841b80
--- /dev/null
+++ b/test/ragelambig4.lm
@@ -0,0 +1,76 @@
+##### LM #####
+lex
+ ignore /[\t\n ]+/
+ literal `^ `| `- `, `: `! `? `.
+ literal `( `) `{ `} `* `& `+
+
+ literal `-- `:> `:>> `<: `-> `**
+
+ token word /[a-zA-Z_][a-zA-Z0-9_]*/
+ token uint /[0-9]+/
+end
+
+
+def start
+ [expression]
+ {
+ print_xml( lhs )
+ }
+
+def expression [term expression_op*]
+
+def expression_op
+ [`| term]
+| [`& term]
+| [`- term]
+| [`-- term]
+
+def term [factor_rep term_op_list_short]
+
+# This list is done manually to get shortest match.
+def term_op_list_short
+ []
+| [term_op term_op_list_short]
+
+def term_op
+ [factor_rep]
+| [`. factor_rep]
+| [`:> factor_rep]
+| [`:>> factor_rep]
+| [`<: factor_rep]
+
+def factor_rep
+ [factor_neg factor_rep_op*]
+
+def factor_rep_op
+ [`*]
+| [`**]
+| [`?]
+| [`+]
+| [`{ factor_rep_num `}]
+| [`{ `, factor_rep_num `}]
+| [`{ factor_rep_num `, `}]
+| [`{ factor_rep_num `, factor_rep_num `}]
+
+def factor_rep_num [uint]
+
+def factor_neg
+ [`! factor_neg]
+| [`^ factor_neg]
+| [factor]
+
+def factor
+ [alphabet_num]
+| [word]
+| [`( expression `)]
+
+def alphabet_num
+ [uint]
+| [`- uint]
+
+parse start[ stdin ]
+print( '\n' )
+##### IN #####
+1 - 1
+##### EXP #####
+<start><expression><term><factor_rep><factor_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_neg><_repeat_factor_rep_op></_repeat_factor_rep_op></factor_rep><term_op_list_short></term_op_list_short></term><_repeat_expression_op><expression_op><_literal_0007>-</_literal_0007><term><factor_rep><factor_neg><factor><alphabet_num><uint>1</uint></alphabet_num></factor></factor_neg><_repeat_factor_rep_op></_repeat_factor_rep_op></factor_rep><term_op_list_short></term_op_list_short></term></expression_op></_repeat_expression_op></expression></start>
diff --git a/test/rediv.lm b/test/rediv.lm
new file mode 100644
index 0000000..c5ac955
--- /dev/null
+++ b/test/rediv.lm
@@ -0,0 +1,99 @@
+##### LM #####
+# Or-literal scanner
+lex
+ token orlit_dash /'-' /
+ token orlit_close /']'/
+
+ rl orlit_specials /[\-\]]/
+ token orlit_chr /^orlit_specials | '\\' any/
+end
+
+def orlit_item
+ [orlit_chr]
+| [orlit_chr orlit_dash orlit_chr]
+
+def orlit
+ [orlit_item*]
+
+# Regex scanner
+lex
+ token orlit_open /'['/
+ token orlit_neg_open /'[^'/
+ token regex_dot /'.'/
+ token regex_star /'*'/
+ token regex_close /'/'/
+
+ rl regex_specials /[\[\.\*\/\\]/
+ token regex_chr /(^regex_specials)+ | '\\' any/
+end
+
+def regex_rep
+ [regex_star]
+| []
+
+def regex_base
+ [regex_chr]
+| [regex_dot]
+| [orlit_open orlit orlit_close]
+| [orlit_neg_open orlit orlit_close]
+
+def regex_item
+ [regex_base regex_rep]
+
+def regex_body
+ [regex_item*]
+
+rl s_string /"'" ([^'\\\n] | '\\' any )* "'"/
+rl d_string /'"' ([^"\\\n] | '\\' any )* '"'/
+
+# Root scanner
+lex
+ token ident /[a-zA-Z_]+/
+ token number /[0-9]+/
+ token string /s_string | d_string/
+
+ literal `+ `- `* `; `/
+ token slash /'/'/
+ token semi /';'/
+
+ ignore wp /[ \t\n]+/
+end
+
+def factor
+ [ident]
+| [number]
+| [string]
+| [`/ regex_body regex_close]
+
+def term
+ [term `* factor]
+| [term `/ factor]
+| [factor]
+
+def expr
+ [expr `+ term]
+| [expr `- term]
+| [term]
+
+def statement
+ [expr `;]
+
+def start
+ [statement*]
+
+parse S: start[ stdin ]
+
+for I:orlit_item in S {
+ if match I [orlit_chr] {
+ print( I '\n' )
+ }
+}
+print_xml( S )
+print( '\n' )
+##### IN #####
+2 / /[^gu-zy].*o[\d-xa]*/;
+##### EXP #####
+g
+y
+a
+<start><_repeat_statement><statement><expr><term><term><factor><number>2</number></factor></term><_literal_0021>/</_literal_0021><factor><_literal_0021>/</_literal_0021><regex_body><_repeat_regex_item><regex_item><regex_base><orlit_neg_open>[^</orlit_neg_open><orlit><_repeat_orlit_item><orlit_item><orlit_chr>g</orlit_chr></orlit_item><orlit_item><orlit_chr>u</orlit_chr><orlit_dash>-</orlit_dash><orlit_chr>z</orlit_chr></orlit_item><orlit_item><orlit_chr>y</orlit_chr></orlit_item></_repeat_orlit_item></orlit><orlit_close>]</orlit_close></regex_base><regex_rep></regex_rep></regex_item><regex_item><regex_base><regex_dot>.</regex_dot></regex_base><regex_rep><regex_star>*</regex_star></regex_rep></regex_item><regex_item><regex_base><regex_chr>o</regex_chr></regex_base><regex_rep></regex_rep></regex_item><regex_item><regex_base><orlit_open>[</orlit_open><orlit><_repeat_orlit_item><orlit_item><orlit_chr>\d</orlit_chr><orlit_dash>-</orlit_dash><orlit_chr>x</orlit_chr></orlit_item><orlit_item><orlit_chr>a</orlit_chr></orlit_item></_repeat_orlit_item></orlit><orlit_close>]</orlit_close></regex_base><regex_rep><regex_star>*</regex_star></regex_rep></regex_item></_repeat_regex_item></regex_body><regex_close>/</regex_close></factor></term></expr><_literal_001f>;</_literal_001f></statement></_repeat_statement></start>
diff --git a/test/reor1.lm b/test/reor1.lm
new file mode 100644
index 0000000..816b2f1
--- /dev/null
+++ b/test/reor1.lm
@@ -0,0 +1,27 @@
+##### LM #####
+lex
+ token id / [abcdef] /
+ token number / [0-9] /
+ ignore / [\n\t ] /
+end
+
+def item [id] | [number]
+
+def start [item*]
+
+parse P: start[stdin]
+print( P )
+##### IN #####
+ab cd ef
+##### EXP #####
+ab cd ef
+##### IN #####
+ag
+##### EXP #####
+NIL--noeol
+##### IN #####
+93
+ab 22
+##### EXP #####
+93
+ab 22
diff --git a/test/reor2.lm b/test/reor2.lm
new file mode 100644
index 0000000..51f0dd3
--- /dev/null
+++ b/test/reor2.lm
@@ -0,0 +1,24 @@
+##### LM #####
+context undo
+
+ lex
+ ignore /[ ]+/
+ literal `;
+ token NL /'\n'/
+ token id /[a-zA-Z_]+/
+ end
+
+ def item
+ [id]
+
+ def start
+ [item* `; NL]
+end
+
+cons Undo: undo[]
+parse Input: undo::start( Undo )[ stdin ]
+print( Input )
+##### IN #####
+a b;
+##### EXP #####
+a b;
diff --git a/test/reparse.lm b/test/reparse.lm
new file mode 100644
index 0000000..907ca6a
--- /dev/null
+++ b/test/reparse.lm
@@ -0,0 +1,26 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+end
+
+def item
+ [id]
+| [`( item* `)]
+
+def start
+ [item*]
+
+parse Input: item*[ stdin ]
+
+S: start = cons start[ Input ]
+
+parse Again: start[ %Input ]
+
+print( Again )
+
+##### IN #####
+a b c ( chocolate fudge ) d e
+##### EXP #####
+a b c ( chocolate fudge ) d e
diff --git a/test/repeat1.lm b/test/repeat1.lm
new file mode 100644
index 0000000..315a63e
--- /dev/null
+++ b/test/repeat1.lm
@@ -0,0 +1,42 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+end
+
+def item
+ [id]
+| [`( item* `)]
+
+def start
+ [item*]
+
+parse Input: start[ stdin ]
+
+match Input [ItemList: item*]
+
+for I: item* in repeat( ItemList )
+ print( ^I '\n' )
+
+for I: item* in rev_repeat( ItemList )
+ print( ^I '\n' )
+##### IN #####
+a b ( c d ) e ( f g ) h i
+##### EXP #####
+a b ( c d ) e ( f g ) h i
+b ( c d ) e ( f g ) h i
+( c d ) e ( f g ) h i
+e ( f g ) h i
+( f g ) h i
+h i
+i
+
+
+i
+h i
+( f g ) h i
+e ( f g ) h i
+( c d ) e ( f g ) h i
+b ( c d ) e ( f g ) h i
+a b ( c d ) e ( f g ) h i
diff --git a/test/repeat2.lm b/test/repeat2.lm
new file mode 100644
index 0000000..e001d8f
--- /dev/null
+++ b/test/repeat2.lm
@@ -0,0 +1,7408 @@
+##### LM #####
+#
+# Copyright 2012 Adrian Thurston <thurston@complang.org>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+lex
+ token word /( [^. \t\n]+ | '.' )/
+ token lws /[ \t]+/
+ token nl / '\n'/
+
+ token cmd_verb1 /'.verb|'/
+ token cmd_verb2 /'.verb/'/
+ token cmd_label /'.label{'/
+ token cmd_ref /'.ref{'/
+ token cmd_em /'.em{'/
+ token cmd_tt /'.tt{'/
+
+ token cmd_title /'.title' lws/
+ token cmd_sub_title /'.subtitle' lws/
+ token cmd_author /'.author' lws/
+
+ token cmd_chapter /'.chapter' lws/
+ token cmd_section /'.section' lws/
+ token cmd_sub_section /'.subsection' lws/
+ token cmd_sub_sub_section /'.subsubsection' lws/
+
+ token cmd_graphic /'.graphic' lws/
+ token cmd_comment /'.comment' lws? '\n'/
+ token cmd_verbatim /'.verbatim' lws? '\n'/
+ token cmd_code /'.code' lws? '\n'/
+
+ token cmd_itemize /'.itemize' lws? '\n'/
+ token end_itemize /'.end' lws 'itemize' lws? '\n'/
+ token cmd_item /'.item' lws/
+
+ token cmd_center /'.center' lws? '\n'/
+ token end_center /'.end' lws 'center' lws? '\n'/
+
+ token cmd_tabular /'.tabular' lws? '\n'/
+ token cmd_row /'.row' lws/
+ token end_tabular /'.end' lws 'tabular' lws? '\n'/
+
+ token cmd_multicols /'.multicols' lws? '\n'/
+ token cmd_columnbreak /'.columnbreak' lws? '\n'/
+ token end_multicols /'.end' lws 'multicols' lws? '\n'/
+
+ token cmd_figure / '.figure' lws?/
+ token cmd_caption / '.caption' lws/
+ token end_figure / '.end' lws 'figure' lws? '\n'/
+
+ token cmd_list /'.list' lws? '\n'/
+ token end_list /'.end' lws 'list' lws? '\n'/
+ token cmd_li /'.li' lws/
+
+ token cmd_license /'.license' lws? '\n'/
+end
+
+lex
+ token bar_data /[^|]*/
+ token end_bar /'|'/
+end
+
+lex
+ token slash_data /[^/]*/
+ token end_slash /'/'/
+end
+
+lex
+ token curly_data /[^}]*/
+ token end_curly /'}'/
+end
+
+def cmd_il
+ [cmd_verb1 bar_data end_bar]
+| [cmd_verb2 slash_data end_slash]
+| [cmd_label curly_data end_curly]
+| [cmd_ref curly_data end_curly]
+| [cmd_em curly_data end_curly]
+| [cmd_tt curly_data end_curly]
+
+def text
+ [word]
+| [lws]
+| [cmd_il]
+
+lex
+ token end_verbatim /lws? '.' lws? 'end' lws 'verbatim' lws? '\n'/
+ token verbatim_line /[^\n]* '\n'/
+end
+
+def verbatim
+ [cmd_verbatim verbatim_line* end_verbatim]
+
+lex
+ token end_code /lws? '.' lws? 'end' lws 'code' lws? '\n'/
+ token code_line /[^\n]* '\n'/
+end
+
+def code
+ [cmd_code code_line* end_code]
+
+lex
+ token end_comment /lws? '.' lws? 'end' lws 'comment' lws? '\n'/
+ token comment_line /[^\n]* '\n'/
+end
+
+def comment
+ [cmd_comment comment_line* end_comment]
+
+def figure
+ [cmd_figure text nl line* caption? end_figure]
+
+def li
+ [cmd_li text* nl]
+
+def _list
+ [cmd_list li* end_list]
+
+def scale
+ [lws word word*]
+
+def graphic
+ [cmd_graphic word scale? nl]
+
+def itemize
+ [cmd_itemize line* item* end_itemize]
+
+def center
+ [cmd_center line* end_center]
+
+def row
+ [cmd_row text* nl]
+
+def tabular
+ [cmd_tabular row* end_tabular]
+
+def multicols_line
+ [cmd_columnbreak]
+| [line]
+
+def multicols
+ [cmd_multicols multicols_line* end_multicols]
+
+def item
+ [cmd_item line*]
+
+def caption
+ [cmd_caption line*]
+
+def line
+ [text]
+| [nl]
+| [comment]
+| [verbatim]
+| [code]
+| [graphic]
+| [itemize]
+| [center]
+| [tabular]
+| [multicols]
+| [figure]
+| [_list]
+
+def sub_sub_section
+ [cmd_sub_sub_section text* nl line*]
+
+def sub_section
+ [cmd_sub_section text* nl line* sub_sub_section*]
+
+def section
+ [cmd_section text* nl line* sub_section*]
+
+def chapter
+ [cmd_chapter text* nl line* section*]
+
+def title
+ [cmd_title text* nl]
+
+def subtitle
+ [cmd_sub_title text* nl]
+
+def author
+ [cmd_author text* nl]
+
+#
+# Paragraphs.
+#
+
+def pline
+ [text text* nl]
+
+def paragraph
+ [pline pline*]
+
+def pextra
+ [nl paragraph]
+
+def block
+ [paragraph pextra*]
+
+def license
+ [cmd_license nl* block nl*]
+
+#
+# Preamble.
+#
+
+def preamble_item
+ [text]
+| [nl]
+| [title]
+| [subtitle]
+| [author]
+
+def preamble
+ [preamble_item* license]
+
+def start
+ [preamble chapter*]
+
+parse Start: start[ stdin ]
+if ( ! Start ) {
+ print( error '\n' )
+ exit( 1 )
+}
+
+int printPlData( Pld: cmd_il )
+{
+ if match Pld [ cmd_verb1 V: bar_data end_bar] {
+ print( '\\verb|' )
+ print( V )
+ print( '|' )
+ }
+ else if match Pld [cmd_verb2 V: slash_data end_slash] {
+ print( '\\verb/' )
+ print( V )
+ print( '/' )
+ }
+ else if match Pld [cmd_label L: curly_data end_curly] {
+ print( '\\label{' )
+ print( L )
+ print( '}' )
+ }
+ else if match Pld [cmd_ref L: curly_data end_curly] {
+ print( '\\ref{' )
+ print( L )
+ print( '}' )
+ }
+ else if match Pld [cmd_em L: curly_data end_curly] {
+ print( '{\\em ' )
+ print( L )
+ print( '}' )
+ }
+ else if match Pld [cmd_tt L: curly_data end_curly] {
+ print( '{\\tt ' )
+ print( L )
+ print( '}' )
+ }
+ else {
+ print( Pld )
+ }
+}
+
+int printText( Lines: text* )
+{
+ for L: text in repeat(Lines) {
+ if match L [PlData: cmd_il] {
+ printPlData( PlData )
+ }
+ else {
+ print( L )
+ }
+ }
+}
+
+int printLines( Lines: line* )
+{
+ for L: line in repeat(Lines) {
+ if match L [word] {
+ print( L )
+ }
+ if match L [lws] {
+ print( L )
+ }
+ if match L [nl] {
+ print( L )
+ }
+ if match L [PlData: cmd_il] {
+ printPlData( PlData )
+ }
+ if match L [cmd_verbatim Lines: verbatim_line* end_verbatim] {
+ print( '\\begin{verbatim}\n' )
+ print( Lines )
+ print( '\\end{verbatim}\n' )
+ print( '\\verbspace\n' )
+ }
+ if match L [cmd_code Lines: code_line* end_code] {
+ print( '\\begin{inline_code}\n' )
+ print( '\\begin{verbatim}\n' )
+ print( Lines )
+ print( '\\end{verbatim}\n' )
+ print( '\\end{inline_code}\n' )
+ print( '\\verbspace\n' )
+ }
+ if match L [cmd_graphic Name: word Scale: scale? nl] {
+ print( '\\graphspace\n' )
+ print( '\\begin{center}\n' )
+ print( '\\includegraphics' )
+ if match Scale [lws Spd: word Spd2: word*]
+ print( '[scale=' Spd Spd2 ']' )
+ else
+ print( '[scale=0.55]' )
+ print( '{' Name '}\n' )
+ print( '\\end{center}\n' )
+ print( '\\graphspace\n' )
+ }
+ if match L [cmd_itemize Lines: line* Items: item* end_itemize] {
+ print( '\\begin{itemize}\n' )
+ printLines( Lines )
+ for Item: item in repeat(Items) {
+ match Item [cmd_item Lines: line*]
+ print( '\\item ' )
+ printLines( Lines )
+ }
+ print( '\\end{itemize}\n' )
+ }
+ if match L [cmd_figure DirData: text nl Lines: line* Caption: caption? end_figure] {
+ print( '\\begin{figure}\n' )
+ print( '\\small\n' )
+ printLines( Lines )
+ if match Caption [cmd_caption CL: line*] {
+ print( '\\caption{' )
+ printLines( CL )
+ print( '}\n' )
+ }
+ print( '\\label{' DirData '}\n' )
+ print( '\\end{figure}\n' )
+ }
+ if match L [cmd_list LiList: li* end_list] {
+ for Li: li* in LiList {
+ if match Li [cmd_li Lines: text* nl Rest: li*] {
+ print( '\\noindent\\\hspace*{24pt}' )
+ printText( Lines )
+ if match Rest [ li li* ]
+ print( '\\\\' )
+ print( '\n' )
+ }
+ }
+ print( '\\vspace{12pt}\n' )
+ }
+ if match L [cmd_center Lines: line* end_center] {
+ print( '\\begin{center}\n' )
+ printLines( Lines )
+ print( '\\end{center}\n' )
+ }
+ if match L [cmd_tabular Rows: row* end_tabular] {
+ print( '\\begin{tabular}{|c|c|c|}\n' )
+ print( '\\hline\n' )
+ for Row: row in repeat(Rows) {
+ if match Row [cmd_row Lines: text* nl ] {
+ printText( Lines )
+ print( '\\\\' '\n' )
+ print( '\\hline\n' )
+ }
+ }
+ print( '\\end{tabular}\n' )
+ }
+ if match L [cmd_multicols Lines: multicols_line* end_multicols] {
+ print( '\\begin{multicols}{2}\n' )
+ for McLine: multicols_line in repeat( Lines ) {
+ if match McLine [Line: line]
+ printLines( cons line* [Line] )
+ else if match McLine [cmd_columnbreak] {
+ print( '\\columnbreak\n' )
+ }
+ }
+ print( '\\end{multicols}\n' )
+ }
+ }
+}
+
+match Start
+ [Preamble: preamble Chapters: chapter*]
+
+Title: title = title in Preamble
+match Title [cmd_title TitleData: text* nl]
+
+SubTitle: subtitle = subtitle in Preamble
+match SubTitle [cmd_sub_title SubTitleData: text* nl]
+
+Author: author = author in Preamble
+match Author [cmd_author AuthorData: text* nl]
+
+License: license = license in Preamble
+
+print(
+ ~\documentclass[letterpaper,11pt,oneside]{book}
+ ~\usepackage{graphicx}
+ ~\usepackage{comment}
+ ~\usepackage{multicol}
+ ~\usepackage[
+ ~ colorlinks=true,
+ ~ linkcolor=black,
+ ~ citecolor=green,
+ ~ filecolor=black,
+ ~ urlcolor=black]{hyperref}
+ ~
+ ~\topmargin -0.20in
+ ~\oddsidemargin 0in
+ ~\textwidth 6.5in
+ ~\textheight 9in
+ ~
+ ~\setlength{\parskip}{0pt}
+ ~\setlength{\topsep}{0pt}
+ ~\setlength{\partopsep}{0pt}
+ ~\setlength{\itemsep}{0pt}
+ ~
+ ~\input{version}
+ ~
+ ~\newcommand{\verbspace}{\vspace{10pt}}
+ ~\newcommand{\graphspace}{\vspace{10pt}}
+ ~
+ ~\renewcommand\floatpagefraction{.99}
+ ~\renewcommand\topfraction{.99}
+ ~\renewcommand\bottomfraction{.99}
+ ~\renewcommand\textfraction{.01}
+ ~\setcounter{totalnumber}{50}
+ ~\setcounter{topnumber}{50}
+ ~\setcounter{bottomnumber}{50}
+ ~
+ ~\newenvironment{inline_code}{\def\baselinestretch{1}\vspace{12pt}\small}{}
+ ~
+ ~\begin{document}
+ ~
+ ~\thispagestyle{empty}
+ ~\begin{center}
+ ~\vspace*{3in}
+)
+
+print( '{\\huge ' TitleData '}\\\\\n' )
+
+print( '\\vspace*{12pt}\n' )
+
+print( '{\\Large ' SubTitleData '}\\\\\n' )
+
+print(
+ ~\vspace{1in}
+ ~by\\
+ ~\vspace{12pt}
+)
+
+print( '{\\large ' AuthorData '}\\\\\n' )
+
+print(
+ ~\end{center}
+ ~\clearpage
+ ~
+ ~\pagenumbering{roman}
+ ~
+ ~\chapter*{License}
+)
+
+print(
+ ~Ragel version \version, \pubdate\\
+ ~Copyright \copyright\ 2003-2012 Adrian D. Thurston
+ ~\vspace{6mm}
+ ~
+)
+
+i: int = 0
+for P: paragraph in License {
+ if ( i != 0 ) {
+ print(
+ ~
+ ~\vspace{5pt}
+ ~
+ )
+ }
+ print( "{\\bf\\it\\noindent " )
+ print( P )
+ print( "}\n" )
+ i = i + 1
+}
+
+print(
+ ~
+ ~\clearpage
+ ~\tableofcontents
+ ~\clearpage
+ ~
+ ~\pagenumbering{arabic}
+)
+
+
+for Chapter: chapter in repeat(Chapters) {
+ match Chapter
+ [cmd_chapter DirData: text* nl Lines: line* SectionList: section*]
+
+ print( '\\chapter{' DirData '}\n' )
+ printLines( Lines )
+
+ for Section: section in repeat(SectionList) {
+ match Section
+ [cmd_section DirData: text* nl Lines: line* SubSectionList: sub_section*]
+
+ print( '\\section{' DirData '}\n' )
+ printLines( Lines )
+ for SubSection: sub_section in repeat(SubSectionList) {
+ match SubSection
+ [cmd_sub_section DirData: text* nl Lines: line*
+ SubSubSectionList: sub_sub_section*]
+
+ print( '\\subsection{' DirData '}\n' )
+ printLines( Lines )
+
+ for SubSubSection: sub_sub_section in repeat(SubSubSectionList) {
+ match SubSubSection
+ [cmd_sub_sub_section DirData: text* nl Lines: line*]
+
+ print( '\\subsubsection{' DirData '}\n' )
+ printLines( Lines )
+ }
+ }
+ }
+}
+
+print(
+ ~
+ ~\end{document}
+)
+##### IN #####
+.title Ragel State Machine Compiler
+
+.subtitle User Guide
+
+.author Adrian Thurston
+
+.license
+
+This document is part of Ragel, and as such, this document is
+released under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+Ragel is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public
+License along with Ragel; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+.chapter Introduction
+
+.section Abstract
+
+Regular expressions are used heavily in practice for the purpose of specifying
+parsers. They are normally used as black boxes linked together with program
+logic. User actions are executed in between invocations of the regular
+expression engine. Adding actions before a pattern terminates requires patterns
+to be broken and pasted back together with program logic. The more user actions
+are needed, the less the advantages of regular expressions are seen.
+
+Ragel is a software development tool that allows user actions to be
+embedded into the transitions of a regular expression's corresponding state
+machine, eliminating the need to switch from the regular expression engine and
+user code execution environment and back again. As a result, expressions can be
+maximally continuous. One is free to specify an entire parser using a single
+regular expression. The single-expression model affords concise and elegant
+descriptions of languages and the generation of very simple, fast and robust
+code. Ragel compiles executable finite state machines from a high level regular language
+notation. Ragel targets C, C++, Objective-C, D, Go, Java and Ruby.
+
+In addition to building state machines from regular expressions, Ragel allows
+the programmer to directly specify state machines with state charts. These two
+notations may be freely combined. There are also facilities for controlling
+nondeterminism in the resulting machines and building scanners using patterns
+that themselves have embedded actions. Ragel can produce code that is small and
+runs very fast. Ragel can handle integer-sized alphabets and can compile very
+large state machines.
+
+.section Motivation
+
+When a programmer is faced with the task of producing a parser for a
+context-free language there are many tools to choose from. It is quite common
+to generate useful and efficient parsers for programming languages from a
+formal grammar. It is also quite common for programmers to avoid such tools
+when making parsers for simple computer languages, such as file formats and
+communication protocols. Such languages are often regular and tools for
+processing the context-free languages are viewed as too heavyweight for the
+purpose of parsing regular languages. The extra run-time effort required for
+supporting the recursive nature of context-free languages is wasted.
+
+When we turn to the regular expression-based parsing tools, such as Lex, Re2C,
+and scripting languages such as Sed, Awk and Perl we find that they are split
+into two levels: a regular expression matching engine and some kind of program
+logic for linking patterns together. For example, a Lex program is composed of
+sets of regular expressions. The implied program logic repeatedly attempts to
+match a pattern in the current set. When a match is found the associated user
+code executed. It requires the user to consider a language as a sequence of
+independent tokens. Scripting languages and regular expression libraries allow
+one to link patterns together using arbitrary program code. This is very
+flexible and powerful, however we can be more concise and clear if we avoid
+gluing together regular expressions with if statements and while loops.
+
+This model of execution, where the runtime alternates between regular
+expression matching and user code exectution places restrictions on when
+action code may be executed. Since action code can only be associated with
+complete patterns, any action code that must be executed before an entire
+pattern is matched requires that the pattern be broken into smaller units.
+Instead of being forced to disrupt the regular expression syntax and write
+smaller expressions, it is desirable to retain a single expression and embed
+code for performing actions directly into the transitions that move over the
+characters. After all, capable programmers are astutely aware of the machinery
+underlying their programs, so why not provide them with access to that
+machinery? To achieve this we require an action execution model for associating
+code with the sub-expressions of a regular expression in a way that does not
+disrupt its syntax.
+
+The primary goal of Ragel is to provide developers with an ability to embed
+actions into the transitions and states of a regular expression's state machine
+in support of the definition of entire parsers or large sections of parsers
+using a single regular expression. From the regular expression we gain a clear
+and concise statement of our language. From the state machine we obtain a very
+fast and robust executable that lends itself to many kinds of analysis and
+visualization.
+
+.section Overview
+
+Ragel is a language for specifying state machines. The Ragel program is a
+compiler that assembles a state machine definition to executable code. Ragel
+is based on the principle that any regular language can be converted to a
+deterministic finite state automaton. Since every regular language has a state
+machine representation and vice versa, the terms regular language and state
+machine (or just machine) will be used interchangeably in this document.
+
+Ragel outputs machines to C, C++, Objective-C, D, Go, Java or Ruby code. The output is
+designed to be generic and is not bound to any particular input or processing
+method. A Ragel machine expects to have data passed to it in buffer blocks.
+When there is no more input, the machine can be queried for acceptance. In
+this way, a Ragel machine can be used to simply recognize a regular language
+like a regular expression library. By embedding code into the regular language,
+a Ragel machine can also be used to parse input.
+
+The Ragel language has many operators for constructing and manipulating
+machines. Machines are built up from smaller machines, to bigger ones, to the
+final machine representing the language that needs to be recognized or parsed.
+
+The core state machine construction operators are those found in most theory
+of computation textbooks. They date back to the 1950s and are widely studied.
+They are based on set operations and permit one to think of languages as a set
+of strings. They are Union, Intersection, Difference, Concatenation and Kleene
+Star. Put together, these operators make up what most people know as regular
+expressions. Ragel also provides a scanner construction operator
+and provides operators for explicitly constructing machines
+using a state chart method. In the state chart method, one joins machines
+together without any implied transitions and then explicitly specifies where
+epsilon transitions should be drawn.
+
+The state machine manipulation operators are specific to Ragel. They allow the
+programmer to access the states and transitions of regular language's
+corresponding machine. There are two uses of the manipulation operators. The
+first and primary use is to embed code into transitions and states, allowing
+the programmer to specify the actions of the state machine.
+
+Ragel attempts to make the action embedding facility as intuitive as possible.
+To do so, a number of issues need to be addressed. For example, when making a
+nondeterministic specification into a DFA using machines that have embedded
+actions, new transitions are often made that have the combined actions of
+several source transitions. Ragel ensures that multiple actions associated with
+a single transition are ordered consistently with respect to the order of
+reference and the natural ordering implied by the construction operators.
+
+The second use of the manipulation operators is to assign priorities to
+transitions. Priorities provide a convenient way of controlling any
+nondeterminism introduced by the construction operators. Suppose two
+transitions leave from the same state and go to distinct target states on the
+same character. If these transitions are assigned conflicting priorities, then
+during the determinization process the transition with the higher priority will
+take precedence over the transition with the lower priority. The lower priority
+transition gets abandoned. The transitions would otherwise be combined into a new
+transition that goes to a new state that is a combination of the original
+target states. Priorities are often required for segmenting machines. The most
+common uses of priorities have been encoded into a set of simple operators
+that should be used instead of priority embeddings whenever possible.
+
+For the purposes of embedding, Ragel divides transitions and states into
+different classes. There are four operators for embedding actions and
+priorities into the transitions of a state machine. It is possible to embed
+into entering transitions, finishing transitions, all transitions and leaving
+transitions. The embedding into leaving transitions is a special case.
+These transition embeddings get stored in the final states of a machine. They
+are transferred to any transitions that are made going out of the machine by
+future concatenation or kleene star operations.
+
+There are several more operators for embedding actions into states. Like the
+transition embeddings, there are various different classes of states that the
+embedding operators access. For example, one can access start states, final
+states or all states, among others. Unlike the transition embeddings, there are
+several different types of state action embeddings. These are executed at
+various different times during the processing of input. It is possible to embed
+actions that are exectued on transitions into a state, on transitions out of a
+state, on transitions taken on the error event, or on transitions taken on the
+EOF event.
+
+Within actions, it is possible to influence the behaviour of the state machine.
+The user can write action code that jumps or calls to another portion of the
+machine, changes the current character being processed, or breaks out of the
+processing loop. With the state machine calling feature Ragel can be used to
+parse languages that are not regular. For example, one can parse balanced
+parentheses by calling into a parser when an open parenthesis character is seen
+and returning to the state on the top of the stack when the corresponding
+closing parenthesis character is seen. More complicated context-free languages
+such as expressions in C are out of the scope of Ragel.
+
+Ragel also provides a scanner construction operator that can be used to build
+scanners much the same way that Lex is used. The Ragel generated code, which
+relies on user-defined variables for backtracking, repeatedly tries to match
+patterns to the input, favouring longer patterns over shorter ones and patterns
+that appear ahead of others when the lengths of the possible matches are
+identical. When a pattern is matched the associated action is executed.
+
+The key distinguishing feature between scanners in Ragel and scanners in Lex is
+that Ragel patterns may be arbitrary Ragel expressions and can therefore
+contain embedded code. With a Ragel-based scanner the user need not wait until
+the end of a pattern before user code can be executed.
+
+Scanners do take Ragel out of the domain of pure state machines and require the
+user to maintain the backtracking related variables. However, scanners
+integrate well with regular state machine instantiations. They can be called to
+or jumped to only when needed, or they can be called out of or jumped out of
+when a simpler, pure state machine model is appropriate.
+
+Two types of output code style are available. Ragel can produce a table-driven
+machine or a directly executable machine. The directly executable machine is
+much faster than the table-driven. On the other hand, the table-driven machine
+is more compact and less demanding on the host language compiler. It is better
+suited to compiling large state machines.
+
+.section Related Work
+
+Lex is perhaps the best-known tool for constructing parsers from regular
+expressions. In the Lex processing model, generated code attempts to match one
+of the user's regular expression patterns, favouring longer matches over
+shorter ones. Once a match is made it then executes the code associated with
+the pattern and consumes the matching string. This process is repeated until
+the input is fully consumed.
+
+Through the use of start conditions, related sets of patterns may be defined.
+The active set may be changed at any time. This allows the user to define
+different lexical regions. It also allows the user to link patterns together by
+requiring that some patterns come before others. This is quite like a
+concatenation operation. However, use of Lex for languages that require a
+considerable amount of pattern concatenation is inappropriate. In such cases a
+Lex program deteriorates into a manually specified state machine, where start
+conditions define the states and pattern actions define the transitions. Lex
+is therefore best suited to parsing tasks where the language to be parsed can
+be described in terms of regions of tokens.
+
+Lex is useful in many scenarios and has undoubtedly stood the test of time.
+There are, however, several drawbacks to using Lex. Lex can impose too much
+overhead for parsing applications where buffering is not required because all
+the characters are available in a single string. In these cases there is
+structure to the language to be parsed and a parser specification tool can
+help, but employing a heavyweight processing loop that imposes a stream
+``pull'' model and dynamic input buffer allocation is inappropriate. An
+example of this kind of scenario is the conversion of floating point numbers
+contained in a string to their corresponding numerical values.
+
+Another drawback is the very issue that Ragel attempts to solve.
+It is not possible to execute a user action while
+matching a character contained inside a pattern. For example, if scanning a
+programming language and string literals can contain newlines which must be
+counted, a Lex user must break up a string literal pattern so as to associate
+an action with newlines. This forces the definition of a new start condition.
+Alternatively the user can reprocess the text of the matched string literal to
+count newlines.
+
+.comment
+
+How ragel is different from Lex.
+
+Like Re2c, Ragel provides a simple execution model that does not make any
+assumptions as to how the input is collected. Also, Ragel does not do any
+buffering in the generated code. Consequently there are no dependencies on
+external functions such as .verb|malloc|.
+
+If buffering is required it can be manually implemented by embedding actions
+that copy the current character to a buffer, or data can be passed to the
+parser using known block boundaries. If the longest-match operator is used,
+Ragel requires the user to ensure that the ending portion of the input buffer
+is preserved when the buffer is exhaused before a token is fully matched. The
+user should move the token prefix to a new memory location, such as back to the
+beginning of the input buffer, then place the subsequently read input
+immediately after the prefix.
+
+These properties of Ragel make it more work to write a program that requires
+the longest-match operator or buffering of input, however they make Ragel a
+more flexible tool that can produce very simple and fast-running programs under
+a variety of input acquisition arrangements.
+
+In Ragel, it is not necessary
+to introduce start conditions to concatenate tokens and retain action
+execution. Ragel allows one to structure a parser as a series of tokens, but
+does not require it.
+
+Like Lex and Re2C, Ragel is able to process input using a longest-match
+execution model, however the core of the Ragel language specifies parsers at a
+much lower level. This core is built around a pure state machine model. When
+building basic machines there is no implied algorithm for processing input
+other than to move from state to state on the transitions of the machine. This
+core of pure state machine operations makes Ragel well suited to handling
+parsing problems not based on token scanning. Should one need to use a
+longest-match model, the functionality is available and the lower level state
+machine construction facilities can be used to specify the patterns of a
+longest-match machine.
+
+This is not possible in Ragel. One can only program
+a longest-match instantiation with a fixed set of rules. One can jump to
+another longest-match machine that employs the same machine definitions in the
+construction of its rules, however no states will be shared.
+
+In Ragel, input may be re-parsed using a
+different machine, but since the action to be executed is associated with
+transitions of the compiled state machine, the longest-match construction does
+not permit a single rule to be excluded from the active set. It cannot be done
+ahead of time nor in the excluded rule's action.
+
+.end comment
+
+The Re2C program defines an input processing model similar to that of Lex.
+Re2C focuses on making generated state machines run very fast and
+integrate easily into any program, free of dependencies. Re2C generates
+directly executable code and is able to claim that generated parsers run nearly
+as fast as their hand-coded equivalents. This is very important for user
+adoption, as programmers are reluctant to use a tool when a faster alternative
+exists. A consideration to ease of use is also important because developers
+need the freedom to integrate the generated code as they see fit.
+
+Many scripting languages provide ways of composing parsers by linking regular
+expressions using program logic. For example, Sed and Awk are two established
+Unix scripting tools that allow the programmer to exploit regular expressions
+for the purpose of locating and extracting text of interest. High-level
+programming languages such as Perl, Python, PHP and Ruby all provide regular
+expression libraries that allow the user to combine regular expressions with
+arbitrary code.
+
+In addition to supporting the linking of regular expressions with arbitrary
+program logic, the Perl programming language permits the embedding of code into
+regular expressions. Perl embeddings do not translate into the embedding of
+code into deterministic state machines. Perl regular expressions are in fact
+not fully compiled to deterministic machines when embedded code is involved.
+They are instead interpreted and involve backtracking. This is shown by the
+following Perl program. When it is fed the input .verb|abcd| the interpretor
+attempts to match the first alternative, printing .verb|a1 b1|. When this
+possibility fails it backtracks and tries the second possibility, printing
+.verb|a2 b2|, at which point it succeeds.
+
+.code
+print "YES\n" if ( <STDIN> =~
+ /( a (?{ print "a1 "; }) b (?{ print "b1 "; }) cX ) |
+ ( a (?{ print "a2 "; }) b (?{ print "b2 "; }) cd )/x )
+.end code
+
+In Ragel there is no regular expression interpretor. Aside from the scanner
+operator, all Ragel expressions are made into deterministic machines and the
+run time simply moves from state to state as it consumes input. An equivalent
+parser expressed in Ragel would attempt both of the alternatives concurrently,
+printing .verb|a1 a2 b1 b2|.
+
+.section Development Status
+
+Ragel is a relatively new tool and is under continuous development. As a rough
+release guide, minor revision number changes are for implementation
+improvements and feature additions. Major revision number changes are for
+implementation and language changes that do not preserve backwards
+compatibility. Though in the past this has not always held true: changes that
+break code have crept into minor version number changes. Typically, the
+documentation lags behind the development in the interest of documenting only
+the lasting features. The latest changes are always documented in the ChangeLog
+file.
+
+.chapter Constructing State Machines
+
+.section Ragel State Machine Specifications
+
+A Ragel input file consists of a program in the host language that contains embedded machine
+specifications. Ragel normally passes input straight to output. When it sees
+a machine specification it stops to read the Ragel statements and possibly generate
+code in place of the specification.
+Afterwards it continues to pass input through. There
+can be any number of FSM specifications in an input file. A multi-line FSM spec
+starts with .verb|%%{| and ends with .verb|}%%|. A single-line FSM spec starts
+with .verb|%%| and ends at the first newline.
+
+While Ragel is looking for FSM specifications it does basic lexical analysis on
+the surrounding input. It interprets literal strings and comments so a
+.verb|%%| sequence in either of those will not trigger the parsing of an FSM
+specification. Ragel does not pass the input through any preprocessor nor does it
+interpret preprocessor directives itself so includes, defines and ifdef logic
+cannot be used to alter the parse of a Ragel input file. It is therefore not
+possible to use an .verb|#if 0| directive to comment out a machine as is
+commonly done in C code. As an alternative, a machine can be prevented from
+causing any generated output by commenting out write statements.
+
+In Figure .ref{cmd-line-parsing}, a multi-line specification is used to define the
+machine and single line specifications are used to trigger the writing of the machine
+data and execution code.
+
+.figure cmd-line-parsing
+.multicols
+.verbatim
+#include <string.h>
+#include <stdio.h>
+
+%%{
+ machine foo;
+ main :=
+ ( 'foo' | 'bar' )
+ 0 @{ res = 1; };
+}%%
+
+%% write data;
+.end verbatim
+.columnbreak
+.verbatim
+int main( int argc, char **argv )
+{
+ int cs, res = 0;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ char *pe = p + strlen(p) + 1;
+ %% write init;
+ %% write exec;
+ }
+ printf("result = %i\n", res );
+ return 0;
+}
+.end verbatim
+.end multicols
+.caption Parsing a command line argument.
+.end figure
+
+.subsection Naming Ragel Blocks
+
+.verbatim
+machine fsm_name;
+.end verbatim
+
+The .verb|machine| statement gives the name of the FSM. If present in a
+specification, this statement must appear first. If a machine specification
+does not have a name then Ragel uses the previous specification name. If no
+previous specification name exists then this is an error. Because FSM
+specifications persist in memory, a machine's statements can be spread across
+multiple machine specifications. This allows one to break up a machine across
+several files or draw in statements that are common to multiple machines using
+the .verb|include| statement.
+
+.subsection Machine Definition
+.label{definition}
+
+.verbatim
+<name> = <expression>;
+.end verbatim
+
+The machine definition statement associates an FSM expression with a name. Machine
+expressions assigned to names can later be referenced in other expressions. A
+definition statement on its own does not cause any states to be generated. It is simply a
+description of a machine to be used later. States are generated only when a definition is
+instantiated, which happens when a definition is referenced in an instantiated
+expression.
+
+.subsection Machine Instantiation
+.label{instantiation}
+
+.verbatim
+<name> := <expression>;
+.end verbatim
+
+The machine instantiation statement generates a set of states representing an
+expression. Each instantiation generates a distinct set of states. The starting
+state of the instantiation is written in the data section of the generated code
+using the instantiation name. If a machine named
+.verb|main| is instantiated, its start state is used as the
+specification's start state and is assigned to the .verb|cs| variable by the
+.verb|write init| command. If no .verb|main| machine is given, the start state
+of the last machine instantiation to appear is used as the specification's
+start state.
+
+From outside the execution loop, control may be passed to any machine by
+assigning the entry point to the .verb|cs| variable. From inside the execution
+loop, control may be passed to any machine instantiation using .verb|fcall|,
+.verb|fgoto| or .verb|fnext| statements.
+
+.subsection Including Ragel Code
+
+.verbatim
+include FsmName "inputfile.rl";
+.end verbatim
+
+The .verb|include| statement can be used to draw in the statements of another FSM
+specification. Both the name and input file are optional, however at least one
+must be given. Without an FSM name, the given input file is searched for an FSM
+of the same name as the current specification. Without an input file the
+current file is searched for a machine of the given name. If both are present,
+the given input file is searched for a machine of the given name.
+
+Ragel searches for included files from the location of the current file.
+Additional directories can be added to the search path using the .verb|-I|
+option.
+
+.subsection Importing Definitions
+.label{import}
+
+.verbatim
+import "inputfile.h";
+.end verbatim
+
+The .verb|import| statement scrapes a file for sequences of tokens that match
+the following forms. Ragel treats these forms as state machine definitions.
+
+.list
+.li .verb|name '=' number|
+.li .verb|name '=' lit_string|
+.li .verb|'define' name number|
+.li .verb|'define' name lit_string|
+.end list
+
+If the input file is a Ragel program then tokens inside any Ragel
+specifications are ignored. See Section .ref{export} for a description of
+exporting machine definitions.
+
+Ragel searches for imported files from the location of the current file.
+Additional directories can be added to the search path using the .verb|-I|
+option.
+
+.section Lexical Analysis of a Ragel Block
+.label{lexing}
+
+Within a machine specification the following lexical rules apply to the input.
+
+.itemize
+
+.item The .verb|#| symbol begins a comment that terminates at the next newline.
+
+.item The symbols .verb|""|, .verb|''|, .verb|//|, .verb|[]| behave as the
+delimiters of literal strings. Within them, the following escape sequences
+are interpreted:
+
+.verb| \0 \a \b \t \n \v \f \r|
+
+A backslash at the end of a line joins the following line onto the current. A
+backslash preceding any other character removes special meaning. This applies
+to terminating characters and to special characters in regular expression
+literals. As an exception, regular expression literals do not support escape
+sequences as the operands of a range within a list. See the bullet on regular
+expressions in Section .ref{basic}.
+
+.item The symbols .verb|{}| delimit a block of host language code that will be
+embedded into the machine as an action. Within the block of host language
+code, basic lexical analysis of comments and strings is done in order to
+correctly find the closing brace of the block. With the exception of FSM
+commands embedded in code blocks, the entire block is preserved as is for
+identical reproduction in the output code.
+
+.item The pattern .verb|[+-]?[0-9]+| denotes an integer in decimal format.
+Integers used for specifying machines may be negative only if the alphabet type
+is signed. Integers used for specifying priorities may be positive or negative.
+
+.item The pattern .verb|0x[0-9A-Fa-f]+| denotes an integer in hexadecimal
+format.
+
+.item The keywords are .verb|access|, .verb|action|, .verb|alphtype|,
+.verb|getkey|, .verb|write|, .verb|machine| and .verb|include|.
+
+.item The pattern .verb|[a-zA-Z_][a-zA-Z_0-9]*| denotes an identifier.
+
+.comment
+.item The allowable symbols are:
+
+.verb/ ( ) ! ^ * ? + : -> - | & . , := = ; > @ $ % /\\
+.verb| >/ $/ %/ </ @/ <>/ >! $! %! <! @! <>!|\\
+.verb| >^ $^ %^ <^ @^ <>^ >~ $~ %~ <~ @~ <>~|\\
+.verb| >* $* %* <* @* <>*|
+.end comment
+
+.item Any amount of whitespace may separate tokens.
+
+.end itemize
+
+.comment
+.section Parse of an FSM Specification
+
+The following statements are possible within an FSM specification. The
+requirements for trailing semicolons loosely follow that of C.
+A block
+specifying code does not require a trailing semicolon. An expression
+statement does require a trailing semicolon.
+.end comment
+
+.section Basic Machines
+.label{basic}
+
+The basic machines are the base operands of regular language expressions. They
+are the smallest unit to which machine construction and manipulation operators
+can be applied.
+
+.itemize
+
+.item .verb|'hello'| -- Concatenation Literal. Produces a machine that matches
+the sequence of characters in the quoted string. If there are 5 characters
+there will be 6 states chained together with the characters in the string. See
+Section .ref{lexing} for information on valid escape sequences.
+
+.comment
+% GENERATE: bmconcat
+% OPT: -p
+% %%{
+% machine bmconcat;
+.verbatim
+main := 'hello';
+.end verbatim
+% }%%
+% END GENERATE
+.end comment
+
+.graphic bmconcat
+
+It is possible
+to make a concatenation literal case-insensitive by appending an .verb|i| to
+the string, for example .verb|'cmd'i|.
+
+.item .verb|"hello"| -- Identical to the single quoted version.
+
+.item .verb|[hello]| -- Or Expression. Produces a union of characters. There
+will be two states with a transition for each unique character between the two states.
+The .verb|[]| delimiters behave like the quotes of a literal string. For example,
+.verb|[ \t]| means tab or space. The .verb|or| expression supports character ranges
+with the .verb|-| symbol as a separator. The meaning of the union can be negated
+using an initial .verb|^| character as in standard regular expressions.
+See Section .ref{lexing} for information on valid escape sequences
+in .verb|or| expressions.
+
+.comment
+% GENERATE: bmor
+% OPT: -p
+% %%{
+% machine bmor;
+.verbatim
+main := [hello];
+.end verbatim
+% }%%
+% END GENERATE
+.end comment
+
+.graphic bmor
+
+.item .verb|''|, .verb|""|, and .verb|[]| -- Zero Length Machine. Produces a machine
+that matches the zero length string. Zero length machines have one state that is both
+a start state and a final state.
+
+.comment
+% GENERATE: bmnull
+% OPT: -p
+% %%{
+% machine bmnull;
+.verbatim
+main := '';
+.end verbatim
+% }%%
+% END GENERATE
+.end comment
+
+.graphic bmnull
+
+% FIXME: More on the range of values here.
+.item .verb|42| -- Numerical Literal. Produces a two state machine with one
+transition on the given number. The number may be in decimal or hexadecimal
+format and should be in the range allowed by the alphabet type. The minimum and
+maximum values permitted are defined by the host machine that Ragel is compiled
+on. For example, numbers in a .verb|short| alphabet on an i386 machine should
+be in the range .verb|-32768| to .verb|32767|.
+
+.comment
+% GENERATE: bmnum
+% %%{
+% machine bmnum;
+.verbatim
+main := 42;
+.end verbatim
+% }%%
+% END GENERATE
+.end comment
+
+.graphic bmnum
+
+.item .verb|/simple_regex/| -- Regular Expression. Regular expressions are
+parsed as a series of expressions that are concatenated together. Each
+concatenated expression
+may be a literal character, the ``any'' character specified by the .verb|.|
+symbol, or a union of characters specified by the .verb|[]| delimiters. If the
+first character of a union is .verb|^| then it matches any character not in the
+list. Within a union, a range of characters can be given by separating the first
+and last characters of the range with the .verb|-| symbol. Each
+concatenated machine may have repetition specified by following it with the
+.verb|*| symbol. The standard escape sequences described in Section
+.ref{lexing} are supported everywhere in regular expressions except as the
+operands of a range within in a list. This notation also supports the .verb|i|
+trailing option. Use it to produce case-insensitive machines, as in .verb|/GET/i|.
+
+Ragel does not support very complex regular expressions because the desired
+results can always be achieved using the more general machine construction
+operators listed in Section .ref{machconst}. The following diagram shows the
+result of compiling .verb|/ab*[c-z].*[123]/|. .verb|DEF| represents the default
+transition, which is taken if no other transition can be taken.
+
+.comment
+% GENERATE: bmregex
+% OPT: -p
+% %%{
+% machine bmregex;
+.verbatim
+main := /ab*[c-z].*[123]/;
+.end verbatim
+% }%%
+% END GENERATE
+.end comment
+
+.graphic bmregex
+
+.item .verb|'a' .. 'z'| -- Range. Produces a machine that matches any
+characters in the specified range. Allowable upper and lower bounds of the
+range are concatenation literals of length one and numerical literals. For
+example, .verb|0x10..0x20|, .verb|0..63|, and .verb|'a'..'z'| are valid ranges.
+The bounds should be in the range allowed by the alphabet type.
+
+.comment
+% GENERATE: bmrange
+% OPT: -p
+% %%{
+% machine bmrange;
+.verbatim
+main := 'a' .. 'z';
+.end verbatim
+% }%%
+% END GENERATE
+.end comment
+
+.graphic bmrange
+
+.item .verb|variable_name| -- Lookup the machine definition assigned to the
+variable name given and use an instance of it. See Section .ref{definition} for
+an important note on what it means to reference a variable name.
+
+.item .verb|builtin_machine| -- There are several built-in machines available
+for use. They are all two state machines for the purpose of matching common
+classes of characters. They are:
+
+.itemize
+
+.item .verb|any | -- Any character in the alphabet.
+
+.item .verb|ascii | -- Ascii characters. .verb|0..127|
+
+.item .verb|extend| -- Ascii extended characters. This is the range
+.verb|-128..127| for signed alphabets and the range .verb|0..255| for unsigned
+alphabets.
+
+.item .verb|alpha | -- Alphabetic characters. .verb|[A-Za-z]|
+
+.item .verb|digit | -- Digits. .verb|[0-9]|
+
+.item .verb|alnum | -- Alpha numerics. .verb|[0-9A-Za-z]|
+
+.item .verb|lower | -- Lowercase characters. .verb|[a-z]|
+
+.item .verb|upper | -- Uppercase characters. .verb|[A-Z]|
+
+.item .verb|xdigit| -- Hexadecimal digits. .verb|[0-9A-Fa-f]|
+
+.item .verb|cntrl | -- Control characters. .verb|0..31|
+
+.item .verb|graph | -- Graphical characters. .verb|[!-~]|
+
+.item .verb|print | -- Printable characters. .verb|[ -~]|
+
+.item .verb|punct | -- Punctuation. Graphical characters that are not alphanumerics.
+.verb|[!-/:-@[-`{-~]|
+
+.item .verb|space | -- Whitespace. .verb|[\t\v\f\n\r ]|
+
+.item .verb|zlen | -- Zero length string. .verb|""|
+
+.item .verb|empty | -- Empty set. Matches nothing. .verb|^any|
+
+.end itemize
+.end itemize
+
+.section Operator Precedence
+The following table shows operator precedence from lowest to highest. Operators
+in the same precedence group are evaluated from left to right.
+
+.tabular
+.row 1&.verb| , |&Join
+.row 2&.verb/ | & - --/&Union, Intersection and Subtraction
+.row 3&.verb| . <: :> :>> |&Concatenation
+.row 4&.verb| : |&Label
+.row 5&.verb| -> |&Epsilon Transition
+.row 6&.verb| > @ $ % |&Transitions Actions and Priorities
+.row 6&.verb| >/ $/ %/ </ @/ <>/ |&EOF Actions
+.row 6&.verb| >! $! %! <! @! <>! |&Global Error Actions
+.row 6&.verb| >^ $^ %^ <^ @^ <>^ |&Local Error Actions
+.row 6&.verb| >~ $~ %~ <~ @~ <>~ |&To-State Actions
+.row 6&.verb| >* $* %* <* @* <>* |&From-State Action
+.row 7&.verb| * ** ? + {n} {,n} {n,} {n,m} |&Repetition
+.row 8&.verb| ! ^ |&Negation and Character-Level Negation
+.row 9&.verb| ( <expr> ) |&Grouping
+.end tabular
+
+.section Regular Language Operators
+.label{machconst}
+
+When using Ragel it is helpful to have a sense of how it constructs machines.
+The determinization process can produce results that seem unusual to someone
+not familiar with the NFA to DFA conversion algorithm. In this section we
+describe Ragel's state machine operators. Though the operators are defined
+using epsilon transitions, it should be noted that this is for discussion only.
+The epsilon transitions described in this section do not persist, but are
+immediately removed by the determinization process which is executed at every
+operation. Ragel does not make use of any nondeterministic intermediate state
+machines.
+
+To create an epsilon transition between two states .verb|x| and .verb|y| is to
+copy all of the properties of .verb|y| into .verb|x|. This involves drawing in
+all of .verb|y|'s to-state actions, EOF actions, etc., in addition to its
+transitions. If .verb|x| and .verb|y| both have a transition out on the same
+character, then the transitions must be combined. During transition
+combination a new transition is made that goes to a new state that is the
+combination of both target states. The new combination state is created using
+the same epsilon transition method. The new state has an epsilon transition
+drawn to all the states that compose it. Since the creation of new epsilon
+transitions may be triggered every time an epsilon transition is drawn, the
+process of drawing epsilon transitions is repeated until there are no more
+epsilon transitions to be made.
+
+A very common error that is made when using Ragel is to make machines that do
+too much. That is, to create machines that have unintentional
+nondetermistic properties. This usually results from being unaware of the common strings
+between machines that are combined together using the regular language
+operators. This can involve never leaving a machine, causing its actions to be
+propagated through all the following states. Or it can involve an alternation
+where both branches are unintentionally taken simultaneously.
+
+This problem forces one to think hard about the language that needs to be
+matched. To guard against this kind of problem one must ensure that the machine
+specification is divided up using boundaries that do not allow ambiguities from
+one portion of the machine to the next. See Chapter
+.ref{controlling-nondeterminism} for more on this problem and how to solve it.
+
+The Graphviz tool is an immense help when debugging improperly compiled
+machines or otherwise learning how to use Ragel. Graphviz Dot files can be
+generated from Ragel programs using the .verb|-V| option. See Section
+.ref{visualization} for more information.
+
+
+.subsection Union
+
+.verb/expr | expr/
+
+The union operation produces a machine that matches any string in machine one
+or machine two. The operation first creates a new start state. Epsilon
+transitions are drawn from the new start state to the start states of both
+input machines. The resulting machine has a final state set equivalent to the
+union of the final state sets of both input machines. In this operation, there
+is the opportunity for nondeterminism among both branches. If there are
+strings, or prefixes of strings that are matched by both machines then the new
+machine will follow both parts of the alternation at once. The union operation is
+shown below.
+
+.graphic opor 1.0
+
+The following example demonstrates the union of three machines representing
+common tokens.
+
+% GENERATE: exor
+% OPT: -p
+% %%{
+% machine exor;
+.code
+# Hex digits, decimal digits, or identifiers
+main := '0x' xdigit+ | digit+ | alpha alnum*;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exor
+
+.subsection Intersection
+
+.verb|expr & expr|
+
+Intersection produces a machine that matches any
+string that is in both machine one and machine two. To achieve intersection, a
+union is performed on the two machines. After the result has been made
+deterministic, any final state that is not a combination of final states from
+both machines has its final state status revoked. To complete the operation,
+paths that do not lead to a final state are pruned from the machine. Therefore,
+if there are any such paths in either of the expressions they will be removed
+by the intersection operator. Intersection can be used to require that two
+independent patterns be simultaneously satisfied as in the following example.
+
+% GENERATE: exinter
+% OPT: -p
+% %%{
+% machine exinter;
+.code
+# Match lines four characters wide that contain
+# words separated by whitespace.
+main :=
+ /[^\n][^\n][^\n][^\n]\n/* &
+ (/[a-z][a-z]*/ | [ \n])**;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exinter
+
+.subsection Difference
+
+.verb|expr - expr|
+
+The difference operation produces a machine that matches
+strings that are in machine one but are not in machine two. To achieve subtraction,
+a union is performed on the two machines. After the result has been made
+deterministic, any final state that came from machine two or is a combination
+of states involving a final state from machine two has its final state status
+revoked. As with intersection, the operation is completed by pruning any path
+that does not lead to a final state. The following example demonstrates the
+use of subtraction to exclude specific cases from a set.
+
+% GENERATE: exsubtr
+% OPT: -p
+% %%{
+% machine exsubtr;
+.code
+# Subtract keywords from identifiers.
+main := /[a-z][a-z]*/ - ( 'for' | 'int' );
+.end code
+% }%%
+% END GENERATE
+
+.graphic exsubtr
+
+.subsection Strong Difference
+.label{strong_difference}
+
+.verb|expr -- expr|
+
+Strong difference produces a machine that matches any string of the first
+machine that does not have any string of the second machine as a substring. In
+the following example, strong subtraction is used to excluded .verb|CRLF| from
+a sequence. In the corresponding visualization, the label .verb|DEF| is short
+for default. The default transition is taken if no other transition can be
+taken.
+
+% GENERATE: exstrongsubtr
+% OPT: -p
+% %%{
+% machine exstrongsubtr;
+.code
+crlf = '\r\n';
+main := [a-z]+ ':' ( any* -- crlf ) crlf;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exstrongsubtr
+
+This operator is equivalent to the following.
+
+.verbatim
+expr - ( any* expr any* )
+.end verbatim
+
+.subsection Concatenation
+
+.verb|expr . expr|
+
+Concatenation produces a machine that matches all the strings in machine one followed by all
+the strings in machine two. Concatenation draws epsilon transitions from the
+final states of the first machine to the start state of the second machine. The
+final states of the first machine lose their final state status, unless the
+start state of the second machine is final as well.
+Concatenation is the default operator. Two machines next to each other with no
+operator between them results in concatenation.
+
+.graphic opconcat 1.0
+
+The opportunity for nondeterministic behaviour results from the possibility of
+the final states of the first machine accepting a string that is also accepted
+by the start state of the second machine.
+The most common scenario in which this happens is the
+concatenation of a machine that repeats some pattern with a machine that gives
+a terminating string, but the repetition machine does not exclude the
+terminating string. The example in Section .ref{strong_difference}
+guards against this. Another example is the expression .verb|("'" any* "'")|.
+When executed the thread of control will
+never leave the .verb|any*| machine. This is a problem especially if actions
+are embedded to process the characters of the .verb|any*| component.
+
+In the following example, the first machine is always active due to the
+nondeterministic nature of concatenation. This particular nondeterminism is intended
+however because we wish to permit EOF strings before the end of the input.
+
+% GENERATE: exconcat
+% OPT: -p
+% %%{
+% machine exconcat;
+.code
+# Require an eof marker on the last line.
+main := /[^\n]*\n/* . 'EOF\n';
+.end code
+% }%%
+% END GENERATE
+
+.graphic exconcat
+
+There is a language
+ambiguity involving concatenation and subtraction. Because concatenation is the
+default operator for two
+adjacent machines there is an ambiguity between subtraction of
+a positive numerical literal and concatenation of a negative numerical literal.
+For example, .verb|(x-7)| could be interpreted as .verb|(x . -7)| or
+.verb|(x - 7)|. In the Ragel language, the subtraction operator always takes precedence
+over concatenation of a negative literal. We adhere to the rule that the default
+concatenation operator takes effect only when there are no other operators between
+two machines. Beware of writing machines such as .verb|(any -1)| when what is
+desired is a concatenation of .verb|any| and .verb|-1|. Instead write
+.verb|(any . -1)| or .verb|(any (-1))|. If in doubt of the meaning of your program do not
+rely on the default concatenation operator; always use the .verb|.| symbol.
+
+
+.subsection Kleene Star
+
+.verb|expr*|
+
+The machine resulting from the Kleene Star operator will match zero or more
+repetitions of the machine it is applied to.
+It creates a new start state and an additional final
+state. Epsilon transitions are drawn between the new start state and the old start
+state, between the new start state and the new final state, and
+between the final states of the machine and the new start state. After the
+machine is made deterministic the effect is of the final states getting all the
+transitions of the start state.
+
+.graphic opstar 1.0
+
+The possibility for nondeterministic behaviour arises if the final states have
+transitions on any of the same characters as the start state. This is common
+when applying kleene star to an alternation of tokens. Like the other problems
+arising from nondeterministic behavior, this is discussed in more detail in Chapter
+.ref{controlling-nondeterminism}. This particular problem can also be solved
+by using the longest-match construction discussed in Section
+.ref{generating-scanners} on scanners.
+
+In this
+example, there is no nondeterminism introduced by the exterior kleene star due to
+the newline at the end of the regular expression. Without the newline the
+exterior kleene star would be redundant and there would be ambiguity between
+repeating the inner range of the regular expression and the entire regular
+expression. Though it would not cause a problem in this case, unnecessary
+nondeterminism in the kleene star operator often causes undesired results for
+new Ragel users and must be guarded against.
+
+% GENERATE: exstar
+% OPT: -p
+% %%{
+% machine exstar;
+.code
+# Match any number of lines with only lowercase letters.
+main := /[a-z]*\n/*;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exstar
+
+.subsection One Or More Repetition
+
+.verb|expr+|
+
+This operator produces the concatenation of the machine with the kleene star of
+itself. The result will match one or more repetitions of the machine. The plus
+operator is equivalent to .verb|(expr . expr*)|.
+
+% GENERATE: explus
+% OPT: -p
+% %%{
+% machine explus;
+.code
+# Match alpha-numeric words.
+main := alnum+;
+.end code
+% }%%
+% END GENERATE
+
+.graphic explus
+
+.subsection Optional
+
+.verb|expr?|
+
+The .em{optional} operator produces a machine that accepts the machine
+given or the zero length string. The optional operator is equivalent to
+.verb/(expr | '' )/. In the following example the optional operator is used to
+possibly extend a token.
+
+% GENERATE: exoption
+% OPT: -p
+% %%{
+% machine exoption;
+.code
+# Match integers or floats.
+main := digit+ ('.' digit+)?;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exoption
+
+.subsection Repetition
+
+.list
+.li .verb|expr {n}| -- Exactly N copies of expr.
+.li .verb|expr {,n}| -- Zero to N copies of expr.
+.li .verb|expr {n,}| -- N or more copies of expr.
+.li .verb|expr {n,m}| -- N to M copies of expr.
+.end list
+
+.subsection Negation
+
+.verb|!expr|
+
+Negation produces a machine that matches any string not matched by the given
+machine. Negation is equivalent to .verb|(any* - expr)|.
+
+% GENERATE: exnegate
+% OPT: -p
+% %%{
+% machine exnegate;
+.code
+# Accept anything but a string beginning with a digit.
+main := ! ( digit any* );
+.end code
+% }%%
+% END GENERATE
+
+.graphic exnegate
+
+.subsection Character-Level Negation
+
+.verb|^expr|
+
+Character-level negation produces a machine that matches any single character
+not matched by the given machine. Character-Level Negation is equivalent to
+.verb|(any - expr)|. It must be applied only to machines that match strings of
+length one.
+
+.section State Machine Minimization
+
+State machine minimization is the process of finding the minimal equivalent FSM accepting
+the language. Minimization reduces the number of states in machines
+by merging equivalent states. It does not change the behaviour of the machine
+in any way. It will cause some states to be merged into one because they are
+functionally equivalent. State minimization is on by default. It can be turned
+off with the .verb|-n| option.
+
+The algorithm implemented is similar to Hopcroft's state minimization
+algorithm. Hopcroft's algorithm assumes a finite alphabet that can be listed in
+memory, whereas Ragel supports arbitrary integer alphabets that cannot be
+listed in memory. Though exact analysis is very difficult, Ragel minimization
+runs close to O(n * log(n)) and requires O(n) temporary storage where
+$n$ is the number of states.
+
+.section Visualization
+.label{visualization}
+
+%In many cases, practical
+%parsing programs will be too large to completely visualize with Graphviz. The
+%proper approach is to reduce the language to the smallest subset possible that
+%still exhibits the characteristics that one wishes to learn about or to fix.
+%This can be done without modifying the source code using the .verb|-M| and
+%.verb|-S| options. If a machine cannot be easily reduced,
+%embeddings of unique actions can be very useful for tracing a
+%particular component of a larger machine specification, since action names are
+%written out on transition labels.
+
+Ragel is able to emit compiled state machines in Graphviz's Dot file format.
+This is done using the .verb|-V| option.
+Graphviz support allows users to perform
+incremental visualization of their parsers. User actions are displayed on
+transition labels of the graph.
+
+If the final graph is too large to be
+meaningful, or even drawn, the user is able to inspect portions of the parser
+by naming particular regular expression definitions with the .verb|-S| and
+.verb|-M| options to the .verb|ragel| program. Use of Graphviz greatly
+improves the Ragel programming experience. It allows users to learn Ragel by
+experimentation and also to track down bugs caused by unintended
+nondeterminism.
+
+Ragel has another option to help debugging. The .verb|-x| option causes Ragel
+to emit the compiled machine in an XML format.
+
+.chapter User Actions
+
+Ragel permits the user to embed actions into the transitions of a regular
+expression's corresponding state machine. These actions are executed when the
+generated code moves over a transition. Like the regular expression operators,
+the action embedding operators are fully compositional. They take a state
+machine and an action as input, embed the action and yield a new state machine
+that can be used in the construction of other machines. Due to the
+compositional nature of embeddings, the user has complete freedom in the
+placement of actions.
+
+A machine's transitions are categorized into four classes. The action embedding
+operators access the transitions defined by these classes. The .em{entering
+transition} operator .verb|>| isolates the start state, then embeds an action
+into all transitions leaving it. The .em{finishing transition} operator
+.verb|@| embeds an action into all transitions going into a final state. The
+.em{all transition} operator .verb|$| embeds an action into all transitions of
+an expression. The .em{leaving transition} operator .verb|%| provides access
+to the yet-unmade transitions moving out of the machine via the final states.
+
+.section Embedding Actions
+
+.verbatim
+action ActionName {
+ /* Code an action here. */
+ count += 1;
+}
+.end verbatim
+
+The action statement defines a block of code that can be embedded into an FSM.
+Action names can be referenced by the action embedding operators in
+expressions. Though actions need not be named in this way (literal blocks
+of code can be embedded directly when building machines), defining reusable
+blocks of code whenever possible is good practice because it potentially increases the
+degree to which the machine can be minimized.
+
+Within an action some Ragel expressions and statements are parsed and
+translated. These allow the user to interact with the machine from action code.
+See Section .ref{vals} for a complete list of statements and values available
+in code blocks.
+
+.subsection Entering Action
+
+.verb|expr > action|
+
+The entering action operator embeds an action into all transitions
+that enter into the machine from the start state. If the start state is final,
+then the action is also embedded into the start state as a leaving action. This
+means that if a machine accepts the zero-length string and control passes
+through the start state then the entering action is executed. Note
+that this can happen on both a following character and on the EOF event.
+
+In some machines the start state has transtions coming in from within the
+machine. In these cases the start state is first isolated from the rest of the
+machine ensuring that the entering actions are exected once only.
+
+% GENERATE: exstact
+% OPT: -p
+% %%{
+% machine exstact;
+.code
+# Execute A at the beginning of a string of alpha.
+action A {}
+main := ( lower* >A ) . ' ';
+.end code
+% }%%
+% END GENERATE
+
+.graphic exstact
+
+.subsection Finishing Action
+
+.verb|expr @ action|
+
+The finishing action operator embeds an action into any transitions that move
+the machine into a final state. Further input may move the machine out of the
+final state, but keep it in the machine. Therefore finishing actions may be
+executed more than once if a machine has any internal transitions out of a
+final state. In the following example the final state has no transitions out
+and the finishing action is executed only once.
+
+% GENERATE: exdoneact
+% OPT: -p
+% %%{
+% machine exdoneact;
+% action A {}
+.code
+# Execute A when the trailing space is seen.
+main := ( lower* ' ' ) @A;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exdoneact
+
+.subsection All Transition Action
+
+.verb|expr $ action|
+
+The all transition operator embeds an action into all transitions of a machine.
+The action is executed whenever a transition of the machine is taken. In the
+following example, A is executed on every character matched.
+
+% GENERATE: exallact
+% OPT: -p
+% %%{
+% machine exallact;
+% action A {}
+.code
+# Execute A on any characters of the machine.
+main := ( 'm1' | 'm2' ) $A;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exallact
+
+.subsection Leaving Actions
+.label{out-actions}
+
+.verb|expr % action|
+
+The leaving action operator queues an action for embedding into the transitions
+that go out of a machine via a final state. The action is first stored in
+the machine's final states and is later transferred to any transitions that are
+made going out of the machine by a kleene star or concatenation operation.
+
+If a final state of the machine is still final when compilation is complete
+then the leaving action is also embedded as an EOF action. Therefore, leaving
+the machine is defined as either leaving on a character or as state machine
+acceptance.
+
+This operator allows one to associate an action with the termination of a
+sequence, without being concerned about what particular character terminates
+the sequence. In the following example, A is executed when leaving the alpha
+machine on the newline character.
+
+% GENERATE: exoutact1
+% OPT: -p
+% %%{
+% machine exoutact1;
+% action A {}
+.code
+# Match a word followed by a newline. Execute A when
+# finishing the word.
+main := ( lower+ %A ) . '\n';
+.end code
+% }%%
+% END GENERATE
+
+.graphic exoutact1
+
+In the following example, the .verb|term_word| action could be used to register
+the appearance of a word and to clear the buffer that the .verb|lower| action used
+to store the text of it.
+
+% GENERATE: exoutact2
+% OPT: -p
+% %%{
+% machine exoutact2;
+% action lower {}
+% action space {}
+% action term_word {}
+% action newline {}
+.code
+word = ( [a-z] @lower )+ %term_word;
+main := word ( ' ' @space word )* '\n' @newline;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exoutact2
+
+In this final example of the action embedding operators, A is executed upon entering
+the alpha machine, B is executed on all transitions of the
+alpha machine, C is executed when the alpha machine is exited by moving into the
+newline machine and N is executed when the newline machine moves into a final
+state.
+
+% GENERATE: exaction
+% OPT: -p
+% %%{
+% machine exaction;
+% action A {}
+% action B {}
+% action C {}
+% action N {}
+.code
+# Execute A on starting the alpha machine, B on every transition
+# moving through it and C upon finishing. Execute N on the newline.
+main := ( lower* >A $B %C ) . '\n' @N;
+.end code
+% }%%
+% END GENERATE
+
+.graphic exaction
+
+
+.section State Action Embedding Operators
+
+The state embedding operators allow one to embed actions into states. Like the
+transition embedding operators, there are several different classes of states
+that the operators access. The meanings of the symbols are similar to the
+meanings of the symbols used for the transition embedding operators. The design
+of the state selections was driven by a need to cover the states of an
+expression with exactly one error action.
+
+Unlike the transition embedding operators, the state embedding operators are
+also distinguished by the different kinds of events that embedded actions can
+be associated with. Therefore the state embedding operators have two
+components. The first, which is the first one or two characters, specifies the
+class of states that the action will be embedded into. The second component
+specifies the type of event the action will be executed on. The symbols of the
+second component also have equivalent kewords.
+
+.multicols
+The different classes of states are:
+
+.list
+.li .verb|> | -- the start state
+.li .verb|< | -- any state except the start state
+.li .verb|$ | -- all states
+.li .verb|% | -- final states
+.li .verb|@ | -- any state except final states
+.li .verb|<>| -- any except start and final (middle)
+.end list
+
+.columnbreak
+
+The different kinds of embeddings are:
+
+.list
+.li .verb|~| -- to-state actions (.verb|to|)
+.li .verb|*| -- from-state actions (.verb|from|)
+.li .verb|/| -- EOF actions (.verb|eof|)
+.li .verb|!| -- error actions (.verb|err|)
+.li .verb|^| -- local error actions (.verb|lerr|)
+.end list
+
+.end multicols
+
+.subsection To-State and From-State Actions
+
+.subsubsection To-State Actions
+
+.list
+.li .verb|>~action >to(name) >to{...} | -- the start state
+.li .verb|<~action <to(name) <to{...} | -- any state except the start state
+.li .verb|$~action $to(name) $to{...} | -- all states
+.li .verb|%~action %to(name) %to{...} | -- final states
+.li .verb|@~action @to(name) @to{...} | -- any state except final states
+.li .verb|<>~action <>to(name) <>to{...}| -- any except start and final (middle)
+.end list
+
+
+To-state actions are executed whenever the state machine moves into the
+specified state, either by a natural movement over a transition or by an
+action-based transfer of control such as .verb|fgoto|. They are executed after the
+in-transition's actions but before the current character is advanced and
+tested against the end of the input block. To-state embeddings stay with the
+state. They are irrespective of the state's current set of transitions and any
+future transitions that may be added in or out of the state.
+
+Note that the setting of the current state variable .verb|cs| outside of the
+execute code is not considered by Ragel as moving into a state and consequently
+the to-state actions of the new current state are not executed. This includes
+the initialization of the current state when the machine begins. This is
+because the entry point into the machine execution code is after the execution
+of to-state actions.
+
+.subsubsection From-State Actions
+
+.list
+.li .verb|>*action >from(name) >from{...} | -- the start state
+.li .verb|<*action <from(name) <from{...} | -- any state except the start state
+.li .verb|$*action $from(name) $from{...} | -- all states
+.li .verb|%*action %from(name) %from{...} | -- final states
+.li .verb|@*action @from(name) @from{...} | -- any state except final states
+.li .verb|<>*action <>from(name) <>from{...}| -- any except start and final (middle)
+.end list
+
+From-state actions are executed whenever the state machine takes a transition from a
+state, either to itself or to some other state. These actions are executed
+immediately after the current character is tested against the input block end
+marker and before the transition to take is sought based on the current
+character. From-state actions are therefore executed even if a transition
+cannot be found and the machine moves into the error state. Like to-state
+embeddings, from-state embeddings stay with the state.
+
+.subsection EOF Actions
+
+.list
+.li .verb|>/action >eof(name) >eof{...} | -- the start state
+.li .verb|</action <eof(name) <eof{...} | -- any state except the start state
+.li .verb|$/action $eof(name) $eof{...} | -- all states
+.li .verb|%/action %eof(name) %eof{...} | -- final states
+.li .verb|@/action @eof(name) @eof{...} | -- any state except final states
+.li .verb|<>/action <>eof(name) <>eof{...}| -- any except start and final (middle)
+.end list
+
+The EOF action embedding operators enable the user to embed actions that are
+executed at the end of the input stream. EOF actions are stored in states and
+generated in the .verb|write exec| block. They are run when .verb|p == pe == eof|
+as the execute block is finishing. EOF actions are free to adjust .verb|p| and
+jump to another part of the machine to restart execution.
+
+.subsection Handling Errors
+
+In many applications it is useful to be able to react to parsing errors. The
+user may wish to print an error message that depends on the context. It
+may also be desirable to consume input in an attempt to return the input stream
+to some known state and resume parsing. To support error handling and recovery,
+Ragel provides error action embedding operators. There are two kinds of error
+actions: global error actions and local error actions.
+Error actions can be used to simply report errors, or by jumping to a machine
+instantiation that consumes input, can attempt to recover from errors.
+
+.subsubsection Global Error Actions
+
+.list
+.li .verb|>!action >err(name) >err{...} | -- the start state
+.li .verb|<!action <err(name) <err{...} | -- any state except the start state
+.li .verb|$!action $err(name) $err{...} | -- all states
+.li .verb|%!action %err(name) %err{...} | -- final states
+.li .verb|@!action @err(name) @err{...} | -- any state except final states
+.li .verb|<>!action <>err(name) <>err{...}| -- any except start and final (middle)
+.end list
+
+Global error actions are stored in the states they are embedded into until
+compilation is complete. They are then transferred to the transitions that move
+into the error state. These transitions are taken on all input characters that
+are not already covered by the state's transitions. If a state with an error
+action is not final when compilation is complete, then the action is also
+embedded as an EOF action.
+
+Error actions can be used to recover from errors by jumping back into the
+machine with .verb|fgoto| and optionally altering .verb|p|.
+
+.subsubsection Local Error Actions
+
+.list
+.li .verb|>^action >lerr(name) >lerr{...} | -- the start state
+.li .verb|<^action <lerr(name) <lerr{...} | -- any state except the start state
+.li .verb|$^action $lerr(name) $lerr{...} | -- all states
+.li .verb|%^action %lerr(name) %lerr{...} | -- final states
+.li .verb|@^action @lerr(name) @lerr{...} | -- any state except final states
+.li .verb|<>^action <>lerr(name) <>lerr{...}| -- any except start and final (middle)
+.end list
+
+Like global error actions, local error actions are also stored in the states
+they are embedded into until a transfer point. The transfer point is different
+however. Each local error action embedding is associated with a name. When a
+machine definition has been fully constructed, all local error action
+embeddings associated with the same name as the machine definition are
+transferred to the error transitions. At this time they are also embedded as
+EOF actions in the case of non-final states.
+
+Local error actions can be used to specify an action to take when a particular
+section of a larger state machine fails to match. A particular machine
+definition's ``thread'' may die and the local error actions executed, however
+the machine as a whole may continue to match input.
+
+There are two forms of local error action embeddings. In the first form the
+name defaults to the current machine. In the second form the machine name can
+be specified. This is useful when it is more convenient to specify the local
+error action in a sub-definition that is used to construct the machine
+definition that the local error action is associated with. To embed local
+error actions and
+explicitly state the machine definition on which the transfer is to happen use
+.verb|(name, action)| as the action.
+
+.subsubsection Example
+
+The following example uses error actions to report an error and jump to a
+machine that consumes the remainder of the line when parsing fails. After
+consuming the line, the error recovery machine returns to the main loop.
+
+% GENERATE: erract
+% %%{
+% machine erract;
+% ws = ' ';
+% address = 'foo AT bar..com';
+% date = 'Monday May 12';
+.code
+action cmd_err {
+ printf( "command error\n" );
+ fhold; fgoto line;
+}
+action from_err {
+ printf( "from error\n" );
+ fhold; fgoto line;
+}
+action to_err {
+ printf( "to error\n" );
+ fhold; fgoto line;
+}
+
+line := [^\n]* '\n' @{ fgoto main; };
+
+main := (
+ (
+ 'from' @err(cmd_err)
+ ( ws+ address ws+ date '\n' ) $err(from_err) |
+ 'to' @err(cmd_err)
+ ( ws+ address '\n' ) $err(to_err)
+ )
+)*;
+.end code
+% }%%
+% %% write data;
+% void f()
+% {
+% %% write init;
+% %% write exec;
+% }
+% END GENERATE
+
+
+
+.section Action Ordering and Duplicates
+
+When combining expressions that have embedded actions it is often the case that
+a number of actions must be executed on a single input character. For example,
+following a concatenation the leaving action of the left expression and the
+entering action of the right expression will be embedded into one transition.
+This requires a method of ordering actions that is intuitive and
+predictable for the user, and repeatable for the compiler.
+
+We associate with the embedding of each action a unique timestamp that is
+used to order actions that appear together on a single transition in the final
+state machine. To accomplish this we recursively traverse the parse tree of
+regular expressions and assign timestamps to action embeddings. References to
+machine definitions are followed in the traversal. When we visit a
+parse tree node we assign timestamps to all .em{entering} action embeddings,
+recurse on the parse tree, then assign timestamps to the remaining .em{all},
+.em{finishing}, and .em{leaving} embeddings in the order in which they
+appear.
+
+By default Ragel does not permit a single action to appear multiple times in an action
+list. When the final machine has been created, actions that appear more than
+once in a single transition, to-state, from-state or EOF action list have their
+duplicates removed.
+The first appearance of the action is preserved. This is useful in a number of
+scenarios. First, it allows us to union machines with common prefixes without
+worrying about the action embeddings in the prefix being duplicated. Second, it
+prevents leaving actions from being transferred multiple times. This can
+happen when a machine is repeated, then followed with another machine that
+begins with a common character. For example:
+
+.verbatim
+word = [a-z]+ %act;
+main := word ( '\n' word )* '\n\n';
+.end verbatim
+
+Note that Ragel does not compare action bodies to determine if they have
+identical program text. It simply checks for duplicates using each action
+block's unique location in the program.
+
+The removal of duplicates can be turned off using the .verb|-d| option.
+
+.section Values and Statements Available in Code Blocks
+.label{vals}
+
+The following values are available in code blocks:
+
+.itemize
+.item .verb|fpc| -- A pointer to the current character. This is equivalent to
+accessing the .verb|p| variable.
+
+.item .verb|fc| -- The current character. This is equivalent to the expression .verb|(*p)|.
+
+.item .verb|fcurs| -- An integer value representing the current state. This
+value should only be read from. To move to a different place in the machine
+from action code use the .verb|fgoto|, .verb|fnext| or .verb|fcall| statements.
+Outside of the machine execution code the .verb|cs| variable may be modified.
+
+.item .verb|ftargs| -- An integer value representing the target state. This
+value should only be read from. Again, .verb|fgoto|, .verb|fnext| and
+.verb|fcall| can be used to move to a specific entry point.
+
+.item .verb|fentry(<label>)| -- Retrieve an integer value representing the
+entry point .verb|label|. The integer value returned will be a compile time
+constant. This number is suitable for later use in control flow transfer
+statements that take an expression. This value should not be compared against
+the current state because any given label can have multiple states representing
+it. The value returned by .verb|fentry| can be any one of the multiple states that
+it represents.
+.end itemize
+
+The following statements are available in code blocks:
+
+.itemize
+
+.item .verb|fhold;| -- Do not advance over the current character. If processing
+data in multiple buffer blocks, the .verb|fhold| statement should only be used
+once in the set of actions executed on a character. Multiple calls may result
+in backing up over the beginning of the buffer block. The .verb|fhold|
+statement does not imply any transfer of control. It is equivalent to the
+.verb|p--;| statement.
+
+.item .verb|fexec <expr>;| -- Set the next character to process. This can be
+used to backtrack to previous input or advance ahead.
+Unlike .verb|fhold|, which can be used
+anywhere, .verb|fexec| requires the user to ensure that the target of the
+backtrack is in the current buffer block or is known to be somewhere ahead of
+it. The machine will continue iterating forward until .verb|pe| is arrived at,
+.verb|fbreak| is called or the machine moves into the error state. In actions
+embedded into transitions, the .verb|fexec| statement is equivalent to setting
+.verb|p| to one position ahead of the next character to process. If the user
+also modifies .verb|pe|, it is possible to change the buffer block entirely.
+
+.item .verb|fgoto <label>;| -- Jump to an entry point defined by
+.verb|<label>|. The .verb|fgoto| statement immediately transfers control to
+the destination state.
+
+.item .verb|fgoto *<expr>;| -- Jump to an entry point given by .verb|<expr>|.
+The expression must evaluate to an integer value representing a state.
+
+.item .verb|fnext <label>;| -- Set the next state to be the entry point defined
+by .verb|label|. The .verb|fnext| statement does not immediately jump to the
+specified state. Any action code following the statement is executed.
+
+.item .verb|fnext *<expr>;| -- Set the next state to be the entry point given
+by .verb|<expr>|. The expression must evaluate to an integer value representing
+a state.
+
+.item .verb|fcall <label>;| -- Push the target state and jump to the entry
+point defined by .verb|<label>|. The next .verb|fret| will jump to the target
+of the transition on which the call was made. Use of .verb|fcall| requires
+the declaration of a call stack. An array of integers named .verb|stack| and a
+single integer named .verb|top| must be declared. With the .verb|fcall|
+construct, control is immediately transferred to the destination state.
+See section .ref{modularization} for more information.
+
+.item .verb|fcall *<expr>;| -- Push the current state and jump to the entry
+point given by .verb|<expr>|. The expression must evaluate to an integer value
+representing a state.
+
+.item .verb|fret;| -- Return to the target state of the transition on which the
+last .verb|fcall| was made. Use of .verb|fret| requires the declaration of a
+call stack. Control is immediately transferred to the destination state.
+
+.item .verb|fbreak;| -- Advance .verb|p|, save the target state to .verb|cs|
+and immediately break out of the execute loop. This statement is useful
+in conjunction with the .verb|noend| write option. Rather than process input
+until .verb|pe| is arrived at, the fbreak statement
+can be used to stop processing from an action. After an .verb|fbreak|
+statement the .verb|p| variable will point to the next character in the input. The
+current state will be the target of the current transition. Note that .verb|fbreak|
+causes the target state's to-state actions to be skipped.
+
+.end itemize
+
+Once actions with control-flow commands are embedded into a
+machine, the user must exercise caution when using the machine as the operand
+to other machine construction operators. If an action jumps to another state
+then unioning any transition that executes that action with another transition
+that follows some other path will cause that other path to be lost. Using
+commands that manually jump around a machine takes us out of the domain of
+regular languages because transitions that the
+machine construction operators are not aware of are introduced. These
+commands should therefore be used with caution.
+
+
+.chapter Controlling Nondeterminism
+.label{controlling-nondeterminism}
+
+Along with the flexibility of arbitrary action embeddings comes a need to
+control nondeterminism in regular expressions. If a regular expression is
+ambiguous, then sub-components of a parser other than the intended parts may become
+active. This means that actions that are irrelevant to the
+current subset of the parser may be executed, causing problems for the
+programmer.
+
+Tools that are based on regular expression engines and that are used for
+recognition tasks will usually function as intended regardless of the presence
+of ambiguities. It is quite common for users of scripting languages to write
+regular expressions that are heavily ambiguous and it generally does not
+matter. As long as one of the potential matches is recognized, there can be any
+number of other matches present. In some parsing systems the run-time engine
+can employ a strategy for resolving ambiguities, for example always pursuing
+the longest possible match and discarding others.
+
+In Ragel, there is no regular expression run-time engine, just a simple state
+machine execution model. When we begin to embed actions and face the
+possibility of spurious action execution, it becomes clear that controlling
+nondeterminism at the machine construction level is very important. Consider
+the following example.
+
+% GENERATE: lines1
+% OPT: -p
+% %%{
+% machine lines1;
+% action first {}
+% action tail {}
+% word = [a-z]+;
+.code
+ws = [\n\t ];
+line = word $first ( ws word $tail )* '\n';
+lines = line*;
+.end code
+% main := lines;
+% }%%
+% END GENERATE
+
+.graphic lines1 0.53
+
+Since the .verb|ws| expression includes the newline character, we will
+not finish the .verb|line| expression when a newline character is seen. We will
+simultaneously pursue the possibility of matching further words on the same
+line and the possibility of matching a second line. Evidence of this fact is
+in the state tables. On several transitions both the .verb|first| and
+.verb|tail| actions are executed. The solution here is simple: exclude
+the newline character from the .verb|ws| expression.
+
+% GENERATE: lines2
+% OPT: -p
+% %%{
+% machine lines2;
+% action first {}
+% action tail {}
+% word = [a-z]+;
+.code
+ws = [\t ];
+line = word $first ( ws word $tail )* '\n';
+lines = line*;
+.end code
+% main := lines;
+% }%%
+% END GENERATE
+
+.graphic lines2
+
+Solving this kind of problem is straightforward when the ambiguity is created
+by strings that are a single character long. When the ambiguity is created by
+strings that are multiple characters long we have a more difficult problem.
+The following example is an incorrect attempt at a regular expression for C
+language comments.
+
+% GENERATE: comments1
+% OPT: -p
+% %%{
+% machine comments1;
+% action comm {}
+.code
+comment = '/*' ( any @comm )* '*/';
+main := comment ' ';
+.end code
+% }%%
+% END GENERATE
+
+.graphic comments1
+
+Using standard concatenation, we will never leave the .verb|any*| expression.
+We will forever entertain the possibility that a .verb|'*/'| string that we see
+is contained in a longer comment and that, simultaneously, the comment has
+ended. The concatenation of the .verb|comment| machine with .verb|SP| is done
+to show this. When we match space, we are also still matching the comment body.
+
+One way to approach the problem is to exclude the terminating string
+from the .verb|any*| expression using set difference. We must be careful to
+exclude not just the terminating string, but any string that contains it as a
+substring. A verbose, but proper specification of a C comment parser is given
+by the following regular expression.
+
+% GENERATE: comments2
+% OPT: -p
+% %%{
+% machine comments2;
+% action comm {}
+.code
+comment = '/*' ( ( any @comm )* - ( any* '*/' any* ) ) '*/';
+.end code
+% main := comment;
+% }%%
+% END GENERATE
+
+.graphic comments2
+
+Note that Ragel's strong subtraction operator .verb|--| can also be used here.
+In doing this subtraction we have phrased the problem of controlling non-determinism in
+terms of excluding strings common to two expressions that interact when
+combined.
+We can also phrase the problem in terms of the transitions of the state
+machines that implement these expressions. During the concatenation of
+.verb|any*| and .verb|'*/'| we will be making transitions that are composed of
+both the loop of the first expression and the final character of the second.
+At this time we want the transition on the .verb|'/'| character to take precedence
+over and disallow the transition that originated in the .verb|any*| loop.
+
+In another parsing problem, we wish to implement a lightweight tokenizer that we can
+utilize in the composition of a larger machine. For example, some HTTP headers
+have a token stream as a sub-language. The following example is an attempt
+at a regular expression-based tokenizer that does not function correctly due to
+unintended nondeterminism.
+
+% GENERATE: smallscanner
+% OPT: -p
+% %%{
+% machine smallscanner;
+% action start_str {}
+% action on_char {}
+% action finish_str {}
+.code
+header_contents = (
+ lower+ >start_str $on_char %finish_str |
+ ' '
+)*;
+.end code
+% main := header_contents;
+% }%%
+% END GENERATE
+
+.graphic smallscanner
+
+In this case, the problem with using a standard kleene star operation is that
+there is an ambiguity between extending a token and wrapping around the machine
+to begin a new token. Using the standard operator, we get an undesirable
+nondeterministic behaviour. Evidence of this can be seen on the transition out
+of state one to itself. The transition extends the string, and simultaneously,
+finishes the string only to immediately begin a new one. What is required is
+for the
+transitions that represent an extension of a token to take precedence over the
+transitions that represent the beginning of a new token. For this problem
+there is no simple solution that uses standard regular expression operators.
+
+.section Priorities
+
+A priority mechanism was devised and built into the determinization
+process, specifically for the purpose of allowing the user to control
+nondeterminism. Priorities are integer values embedded into transitions. When
+the determinization process is combining transitions that have different
+priorities, the transition with the higher priority is preserved and the
+transition with the lower priority is dropped.
+
+Unfortunately, priorities can have unintended side effects because their
+operation requires that they linger in transitions indefinitely. They must linger
+because the Ragel program cannot know when the user is finished with a priority
+embedding. A solution whereby they are explicitly deleted after use is
+conceivable; however this is not very user-friendly. Priorities were therefore
+made into named entities. Only priorities with the same name are allowed to
+interact. This allows any number of priorities to coexist in one machine for
+the purpose of controlling various different regular expression operations and
+eliminates the need to ever delete them. Such a scheme allows the user to
+choose a unique name, embed two different priority values using that name
+and be confident that the priority embedding will be free of any side effects.
+
+In the first form of priority embedding the name defaults to the name of the machine
+definition that the priority is assigned in. In this sense priorities are by
+default local to the current machine definition or instantiation. Beware of
+using this form in a longest-match machine, since there is only one name for
+the entire set of longest match patterns. In the second form the priority's
+name can be specified, allowing priority interaction across machine definition
+boundaries.
+
+.itemize
+.item .verb|expr > int| -- Sets starting transitions to have priority int.
+.item .verb|expr @ int| -- Sets transitions that go into a final state to have priority int.
+.item .verb|expr $ int| -- Sets all transitions to have priority int.
+.item .verb|expr % int| -- Sets leaving transitions to
+have priority int. When a transition is made going out of the machine (either
+by concatenation or kleene star) its priority is immediately set to the
+leaving priority.
+.end itemize
+
+The second form of priority assignment allows the programmer to specify the name
+to which the priority is assigned.
+
+.itemize
+.item .verb|expr > (name, int)| -- Starting transitions.
+.item .verb|expr @ (name, int)| -- Finishing transitions (into a final state).
+.item .verb|expr $ (name, int)| -- All transitions.
+.item .verb|expr % (name, int)| -- Leaving transitions.
+.end itemize
+
+.section Guarded Operators that Encapsulate Priorities
+
+Priority embeddings are a very expressive mechanism. At the same time they
+can be very confusing for the user. They force the user to imagine
+the transitions inside two interacting expressions and work out the precise
+effects of the operations between them. When we consider
+that this problem is worsened by the
+potential for side effects caused by unintended priority name collisions, we
+see that exposing the user to priorities is undesirable.
+
+Fortunately, in practice the use of priorities has been necessary only in a
+small number of scenarios. This allows us to encapsulate their functionality
+into a small set of operators and fully hide them from the user. This is
+advantageous from a language design point of view because it greatly simplifies
+the design.
+
+Going back to the C comment example, we can now properly specify
+it using a guarded concatenation operator which we call .em{finish-guarded
+concatenation}. From the user's point of view, this operator terminates the
+first machine when the second machine moves into a final state. It chooses a
+unique name and uses it to embed a low priority into all
+transitions of the first machine. A higher priority is then embedded into the
+transitions of the second machine that enter into a final state. The following
+example yields a machine identical to the example in Section
+.ref{controlling-nondeterminism}.
+
+.code
+comment = '/*' ( any @comm )* :>> '*/';
+.end code
+
+.graphic comments2
+
+Another guarded operator is .em{left-guarded concatenation}, given by the
+.verb|<:| compound symbol. This operator places a higher priority on all
+transitions of the first machine. This is useful if one must forcibly separate
+two lists that contain common elements. For example, one may need to tokenize a
+stream, but first consume leading whitespace.
+
+Ragel also includes a .em{longest-match kleene star} operator, given by the
+.verb|**| compound symbol. This
+guarded operator embeds a high
+priority into all transitions of the machine.
+A lower priority is then embedded into the leaving transitions. When the
+kleene star operator makes the epsilon transitions from
+the final states into the new start state, the lower priority will be transferred
+to the epsilon transitions. In cases where following an epsilon transition
+out of a final state conflicts with an existing transition out of a final
+state, the epsilon transition will be dropped.
+
+Other guarded operators are conceivable, such as guards on union that cause one
+alternative to take precedence over another. These may be implemented when it
+is clear they constitute a frequently used operation.
+In the next section we discuss the explicit specification of state machines
+using state charts.
+
+.subsection Entry-Guarded Concatenation
+
+.verb|expr :> expr|
+
+This operator concatenates two machines, but first assigns a low
+priority to all transitions
+of the first machine and a high priority to the starting transitions of the
+second machine. This operator is useful if from the final states of the first
+machine it is possible to accept the characters in the entering transitions of
+the second machine. This operator effectively terminates the first machine
+immediately upon starting the second machine, where otherwise they would be
+pursued concurrently. In the following example, entry-guarded concatenation is
+used to move out of a machine that matches everything at the first sign of an
+end-of-input marker.
+
+% GENERATE: entryguard
+% OPT: -p
+% %%{
+% machine entryguard;
+.code
+# Leave the catch-all machine on the first character of FIN.
+main := any* :> 'FIN';
+.end code
+% }%%
+% END GENERATE
+
+.graphic entryguard
+
+Entry-guarded concatenation is equivalent to the following:
+
+.verbatim
+expr $(unique_name,0) . expr >(unique_name,1)
+.end verbatim
+
+.subsection Finish-Guarded Concatenation
+
+.verb|expr :>> expr|
+
+This operator is
+like the previous operator, except the higher priority is placed on the final
+transitions of the second machine. This is useful if one wishes to entertain
+the possibility of continuing to match the first machine right up until the
+second machine enters a final state. In other words it terminates the first
+machine only when the second accepts. In the following example, finish-guarded
+concatenation causes the move out of the machine that matches everything to be
+delayed until the full end-of-input marker has been matched.
+
+% GENERATE: finguard
+% OPT: -p
+% %%{
+% machine finguard;
+.code
+# Leave the catch-all machine on the last character of FIN.
+main := any* :>> 'FIN';
+.end code
+% }%%
+% END GENERATE
+
+.graphic finguard
+
+Finish-guarded concatenation is equivalent to the following, with one
+exception. If the right machine's start state is final, the higher priority is
+also embedded into it as a leaving priority. This prevents the left machine
+from persisting via the zero-length string.
+
+.verbatim
+expr $(unique_name,0) . expr @(unique_name,1)
+.end verbatim
+
+.subsection Left-Guarded Concatenation
+
+.verb|expr <: expr|
+
+This operator places
+a higher priority on the left expression. It is useful if you want to prefix a
+sequence with another sequence composed of some of the same characters. For
+example, one can consume leading whitespace before tokenizing a sequence of
+whitespace-separated words as in:
+
+% GENERATE: leftguard
+% OPT: -p
+% %%{
+% machine leftguard;
+% action alpha {}
+% action ws {}
+% action start {}
+% action fin {}
+.code
+main := ( ' '* >start %fin ) <: ( ' ' $ws | [a-z] $alpha )*;
+.end code
+% }%%
+% END GENERATE
+
+.graphic leftguard
+
+Left-guarded concatenation is equivalent to the following:
+
+.verbatim
+expr $(unique_name,1) . expr >(unique_name,0)
+.end verbatim
+
+.subsection Longest-Match Kleene Star
+.label{longest_match_kleene_star}
+
+.verb|expr**|
+
+This version of kleene star puts a higher priority on staying in the
+machine versus wrapping around and starting over. The LM kleene star is useful
+when writing simple tokenizers. These machines are built by applying the
+longest-match kleene star to an alternation of token patterns, as in the
+following.
+
+% GENERATE: lmkleene
+% OPT: -p
+% %%{
+% machine exfinpri;
+% action A {}
+% action B {}
+.code
+# Repeat tokens, but make sure to get the longest match.
+main := (
+ lower ( lower | digit )* %A |
+ digit+ %B |
+ ' '
+)**;
+.end code
+% }%%
+% END GENERATE
+
+.graphic lmkleene
+
+If a regular kleene star were used the machine above would not be able to
+distinguish between extending a word and beginning a new one. This operator is
+equivalent to:
+
+.verbatim
+( expr $(unique_name,1) %(unique_name,0) )*
+.end verbatim
+
+When the kleene star is applied, transitions that go out of the machine and
+back into it are made. These are assigned a priority of zero by the leaving
+transition mechanism. This is less than the priority of one assigned to the
+transitions leaving the final states but not leaving the machine. When
+these transitions clash on the same character, the
+transition that stays in the machine takes precedence. The transition
+that wraps around is dropped.
+
+Note that this operator does not build a scanner in the traditional sense
+because there is never any backtracking. To build a scanner with backtracking
+use the Longest-Match machine construction described in Section
+.ref{generating-scanners}.
+
+.chapter Interface to Host Program
+
+The Ragel code generator is very flexible. The generated code has no
+dependencies and can be inserted in any function, perhaps inside a loop if
+desired. The user is responsible for declaring and initializing a number of
+required variables, including the current state and the pointer to the input
+stream. These can live in any scope. Control of the input processing loop is
+also possible: the user may break out of the processing loop and return to it
+at any time.
+
+In the case of the C, D, and Go host languages, Ragel is able to generate very
+fast-running code that implements state machines as directly executable code.
+Since very large files strain the host language compiler, table-based code
+generation is also supported. In the future we hope to provide a partitioned,
+directly executable format that is able to reduce the burden on the host
+compiler by splitting large machines across multiple functions.
+
+In the case of Java and Ruby, table-based code generation is the only code
+style supported. In the future this may be expanded to include other code
+styles.
+
+Ragel can be used to parse input in one block, or it can be used to parse input
+in a sequence of blocks as it arrives from a file or socket. Parsing the input
+in a sequence of blocks brings with it a few responsibilities. If the parser
+utilizes a scanner, care must be taken to not break the input stream anywhere
+but token boundaries. If pointers to the input stream are taken during
+parsing, care must be taken to not use a pointer that has been invalidated by
+movement to a subsequent block. If the current input data pointer is moved
+backwards it must not be moved past the beginning of the current block.
+
+Figure .ref{basic-example} shows a simple Ragel program that does not have any
+actions. The example tests the first argument of the program against a number
+pattern and then prints the machine's acceptance status.
+
+.figure basic-example
+.verbatim
+#include <stdio.h>
+#include <string.h>
+%%{
+ machine foo;
+ write data;
+}%%
+int main( int argc, char **argv )
+{
+ int cs;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ char *pe = p + strlen( p );
+ %%{
+ main := [0-9]+ ( '.' [0-9]+ )?;
+
+ write init;
+ write exec;
+ }%%
+ }
+ printf("result = %i\n", cs >= foo_first_final );
+ return 0;
+}
+.end verbatim
+.caption A basic Ragel example without any actions.
+.end figure
+
+.section Variables Used by Ragel
+
+There are a number of variables that Ragel expects the user to declare. At a
+very minimum the .verb|cs|, .verb|p| and .verb|pe| variables must be declared.
+In Go, Java and Ruby code the .verb|data| variable must also be declared. If
+EOF actions are used then the .verb|eof| variable is required. If
+stack-based state machine control flow statements are used then the
+.verb|stack| and .verb|top| variables are required. If a scanner is declared
+then the .verb|act|, .verb|ts| and .verb|te| variables must be
+declared.
+
+.itemize
+
+.item .verb|cs| - Current state. This must be an integer and it should persist
+across invocations of the machine when the data is broken into blocks that are
+processed independently. This variable may be modified from outside the
+execution loop, but not from within.
+
+.item .verb|p| - Data pointer. In C/D code this variable is expected to be a
+pointer to the character data to process. It should be initialized to the
+beginning of the data block on every run of the machine. In Go, Java and Ruby it is
+used as an offset to .verb|data| and must be an integer. In this case it should
+be initialized to zero on every run of the machine.
+
+.item .verb|pe| - Data end pointer. This should be initialized to .verb|p| plus
+the data length on every run of the machine. In Go, Java and Ruby code this should
+be initialized to the data length.
+
+.item .verb|eof| - End of file pointer. This should be set to .verb|pe| when
+the buffer block being processed is the last one, otherwise it should be set to
+null. In Go, Java and Ruby code .verb|-1| must be used instead of null. If the EOF
+event can be known only after the final buffer block has been processed, then
+it is possible to set .verb|p = pe = eof| and run the execute block.
+
+.item .verb|data| - This variable is only required in Go, Java and Ruby code. It
+must be an array containting the data to process.
+
+.item .verb|stack| - This must be an array of integers. It is used to store
+integer values representing states. If the stack must resize dynamically the
+Pre-push and Post-Pop statements can be used to do this (Sections
+.ref{prepush} and .ref{postpop}).
+
+.item .verb|top| - This must be an integer value and will be used as an offset
+to .verb|stack|, giving the next available spot on the top of the stack.
+
+.item .verb|act| - This must be an integer value. It is a variable sometimes
+used by scanner code to keep track of the most recent successful pattern match.
+
+.item .verb|ts| - This must be a pointer to character data. In Go, Java and
+Ruby code this must be an integer. See Section .ref{generating-scanners} for
+more information.
+
+.item .verb|te| - Also a pointer to character data.
+
+.end itemize
+
+.section Alphtype Statement
+
+.verbatim
+alphtype unsigned int;
+.end verbatim
+
+The alphtype statement specifies the alphabet data type that the machine
+operates on. During the compilation of the machine, integer literals are
+expected to be in the range of possible values of the alphtype. The default
+is .verb|char| for all languages except Go where the default is .verb|byte|.
+
+.multicols
+C/C++/Objective-C:
+.verbatim
+ char unsigned char
+ short unsigned short
+ int unsigned int
+ long unsigned long
+.end verbatim
+
+Go:
+.verbatim
+ byte
+ int8 uint8
+ int16 uint16
+ int32 uint32
+ int
+.end verbatim
+
+Ruby:
+.verbatim
+ char
+ int
+.end verbatim
+
+.columnbreak
+
+Java:
+.verbatim
+ char
+ byte
+ short
+ int
+.end verbatim
+
+D:
+.verbatim
+ char
+ byte ubyte
+ short ushort
+ wchar
+ int uint
+ dchar
+.end verbatim
+
+.end multicols
+
+.section Getkey Statement
+
+.verbatim
+getkey fpc->id;
+.end verbatim
+
+This statement specifies to Ragel how to retrieve the current character from
+from the pointer to the current element (.verb|p|). Any expression that returns
+a value of the alphabet type
+may be used. The getkey statement may be used for looking into element
+structures or for translating the character to process. The getkey expression
+defaults to .verb|(*p)|. In goto-driven machines the getkey expression may be
+evaluated more than once per element processed, therefore it should not incur a
+large cost nor preclude optimization.
+
+.section Access Statement
+
+.verbatim
+access fsm->;
+.end verbatim
+
+The access statement specifies how the generated code should
+access the machine data that is persistent across processing buffer blocks.
+This applies to all variables except .verb|p|, .verb|pe| and .verb|eof|. This includes
+.verb|cs|, .verb|top|, .verb|stack|, .verb|ts|, .verb|te| and .verb|act|.
+The access statement is useful if a machine is to be encapsulated inside a
+structure in C code. It can be used to give the name of
+a pointer to the structure.
+
+.section Variable Statement
+
+.verbatim
+variable p fsm->p;
+.end verbatim
+
+The variable statement specifies how to access a specific
+variable. All of the variables that are declared by the user and
+used by Ragel can be changed. This includes .verb|p|, .verb|pe|, .verb|eof|, .verb|cs|,
+.verb|top|, .verb|stack|, .verb|ts|, .verb|te| and .verb|act|.
+In Go, Ruby and Java code generation the .verb|data| variable can also be changed.
+
+.section Pre-Push Statement
+.label{prepush}
+
+.verbatim
+prepush {
+ /* stack growing code */
+}
+.end verbatim
+
+The prepush statement allows the user to supply stack management code that is
+written out during the generation of fcall, immediately before the current
+state is pushed to the stack. This statement can be used to test the number of
+available spaces and dynamically grow the stack if necessary.
+
+.section Post-Pop Statement
+.label{postpop}
+
+.verbatim
+postpop {
+ /* stack shrinking code */
+}
+.end verbatim
+
+The postpop statement allows the user to supply stack management code that is
+written out during the generation of fret, immediately after the next state is
+popped from the stack. This statement can be used to dynamically shrink the
+stack.
+
+.section Write Statement
+.label{write-statement}
+
+.verbatim
+write <component> [options];
+.end verbatim
+
+The write statement is used to generate parts of the machine.
+There are seven
+components that can be generated by a write statement. These components make up the
+state machine's data, initialization code, execution code, and export definitions.
+A write statement may appear before a machine is fully defined.
+This allows one to write out the data first then later define the machine where
+it is used. An example of this is shown in Figure .ref{fbreak-example}.
+
+.subsection Write Data
+.verbatim
+write data [options];
+.end verbatim
+
+The write data statement causes Ragel to emit the constant static data needed
+by the machine. In table-driven output styles (see Section .ref{genout}) this
+is a collection of arrays that represent the states and transitions of the
+machine. In goto-driven machines much less data is emitted. At the very
+minimum a start state .verb|name_start| is generated. All variables written
+out in machine data have both the .verb|static| and .verb|const| properties and
+are prefixed with the name of the machine and an
+underscore. The data can be placed inside a class, inside a function, or it can
+be defined as global data.
+
+Two variables are written that may be used to test the state of the machine
+after a buffer block has been processed. The .verb|name_error| variable gives
+the id of the state that the machine moves into when it cannot find a valid
+transition to take. The machine immediately breaks out of the processing loop when
+it finds itself in the error state. The error variable can be compared to the
+current state to determine if the machine has failed to parse the input. If the
+machine is complete, that is from every state there is a transition to a proper
+state on every possible character of the alphabet, then no error state is required
+and this variable will be set to -1.
+
+The .verb|name_first_final| variable stores the id of the first final state.
+All of the machine's states are sorted by their final state status before
+having their ids assigned. Checking if the machine has accepted its input can
+then be done by checking if the current state is greater-than or equal to the
+first final state.
+
+Data generation has several options:
+
+.list
+.li .verb|noerror | - Do not generate the integer variable that gives the id of the error state.
+.li .verb|nofinal | - Do not generate the integer variable that gives the id of the first final state.
+.li .verb|noprefix | - Do not prefix the variable names with the name of the machine.
+.end list
+
+.figure fbreak-example
+.verbatim
+#include <stdio.h>
+%% machine foo;
+%% write data;
+int main( int argc, char **argv )
+{
+ int cs, res = 0;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ %%{
+ main :=
+ [a-z]+
+ 0 @{ res = 1; fbreak; };
+ write init;
+ write exec noend;
+ }%%
+ }
+ printf("execute = %i\n", res );
+ return 0;
+}
+.end verbatim
+.caption Use of .tt{noend} write option and the .tt{fbreak} statement for
+processing a string.
+.end figure
+
+.subsection Write Start, First Final and Error
+
+.verbatim
+write start;
+write first_final;
+write error;
+.end verbatim
+
+These three write statements provide an alternative means of accessing the
+.verb|start|, .verb|first_final| and .verb|error| states. If there are many
+different machine specifications in one file it is easy to get the prefix for
+these wrong. This is especially true if the state machine boilerplate is
+frequently made by a copy-paste-edit process. These write statements allow the
+problem to be avoided. They can be used as follows:
+
+.verbatim
+/* Did parsing succeed? */
+if ( cs < %%{ write first_final; }%% ) {
+ result = ERR_PARSE_ERROR;
+ goto fail;
+}
+.end verbatim
+
+.subsection Write Init
+.verbatim
+write init [options];
+.end verbatim
+
+The write init statement causes Ragel to emit initialization code. This should
+be executed once before the machine is started. At a very minimum this sets the
+current state to the start state. If other variables are needed by the
+generated code, such as call stack variables or scanner management
+variables, they are also initialized here.
+
+The .verb|nocs| option to the write init statement will cause ragel to skip
+intialization of the cs variable. This is useful if the user wishes to use
+custom logic to decide which state the specification should start in.
+
+.subsection Write Exec
+.verbatim
+write exec [options];
+.end verbatim
+
+The write exec statement causes Ragel to emit the state machine's execution code.
+Ragel expects several variables to be available to this code. At a very minimum, the
+generated code needs access to the current character position .verb|p|, the ending
+position .verb|pe| and the current state .verb|cs| (though .verb|pe|
+can be omitted using the .verb|noend| write option).
+The .verb|p| variable is the cursor that the execute code will
+used to traverse the input. The .verb|pe| variable should be set up to point to one
+position past the last valid character in the buffer.
+
+Other variables are needed when certain features are used. For example using
+the .verb|fcall| or .verb|fret| statements requires .verb|stack| and
+.verb|top| variables to be defined. If a longest-match construction is used,
+variables for managing backtracking are required.
+
+The write exec statement has one option. The .verb|noend| option tells Ragel
+to generate code that ignores the end position .verb|pe|. In this
+case the user must explicitly break out of the processing loop using
+.verb|fbreak|, otherwise the machine will continue to process characters until
+it moves into the error state. This option is useful if one wishes to process a
+null terminated string. Rather than traverse the string to discover then length
+before processing the input, the user can break out when the null character is
+seen. The example in Figure .ref{fbreak-example} shows the use of the
+.verb|noend| write option and the .verb|fbreak| statement for processing a string.
+
+.subsection Write Exports
+.label{export}
+
+.verbatim
+write exports;
+.end verbatim
+
+The export feature can be used to export simple machine definitions. Machine definitions
+are marked for export using the .verb|export| keyword.
+
+.verbatim
+export machine_to_export = 0x44;
+.end verbatim
+
+When the write exports statement is used these machines are
+written out in the generated code. Defines are used for C and constant integers
+are used for D, Java and Ruby. See Section .ref{import} for a description of the
+import statement.
+
+.section Maintaining Pointers to Input Data
+
+In the creation of any parser it is not uncommon to require the collection of
+the data being parsed. It is always possible to collect data into a growable
+buffer as the machine moves over it, however the copying of data is a somewhat
+wasteful use of processor cycles. The most efficient way to collect data from
+the parser is to set pointers into the input then later reference them. This
+poses a problem for uses of Ragel where the input data arrives in blocks, such
+as over a socket or from a file. If a pointer is set in one buffer block but
+must be used while parsing a following buffer block, some extra consideration
+to correctness must be made.
+
+The scanner constructions exhibit this problem, requiring the maintenance
+code described in Section .ref{generating-scanners}. If a longest-match
+construction has been used somewhere in the machine then it is possible to
+take advantage of the required prefix maintenance code in the driver program to
+ensure pointers to the input are always valid. If laying down a pointer one can
+set .verb|ts| at the same spot or ahead of it. When data is shifted in
+between loops the user must also shift the pointer. In this way it is possible
+to maintain pointers to the input that will always be consistent.
+
+.figure line-oriented
+.verbatim
+ int have = 0;
+ while ( 1 ) {
+ char *p, *pe, *data = buf + have;
+ int len, space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ fprintf(stderr, "BUFFER OUT OF SPACE\n");
+ exit(1);
+ }
+
+ len = fread( data, 1, space, stdin );
+ if ( len == 0 )
+ break;
+
+ /* Find the last newline by searching backwards. */
+ p = buf;
+ pe = data + len - 1;
+ while ( *pe != '\n' && pe >= buf )
+ pe--;
+ pe += 1;
+
+ %% write exec;
+
+ /* How much is still in the buffer? */
+ have = data + len - pe;
+ if ( have > 0 )
+ memmove( buf, pe, have );
+
+ if ( len < space )
+ break;
+ }
+.end verbatim
+.caption An example of line-oriented processing.
+.end figure
+
+In general, there are two approaches for guaranteeing the consistency of
+pointers to input data. The first approach is the one just described;
+lay down a marker from an action,
+then later ensure that the data the marker points to is preserved ahead of
+the buffer on the next execute invocation. This approach is good because it
+allows the parser to decide on the pointer-use boundaries, which can be
+arbitrarily complex parsing conditions. A downside is that it requires any
+pointers that are set to be corrected in between execute invocations.
+
+The alternative is to find the pointer-use boundaries before invoking the execute
+routine, then pass in the data using these boundaries. For example, if the
+program must perform line-oriented processing, the user can scan backwards from
+the end of an input block that has just been read in and process only up to the
+first found newline. On the next input read, the new data is placed after the
+partially read line and processing continues from the beginning of the line.
+An example of line-oriented processing is given in Figure .ref{line-oriented}.
+
+.section Specifying the Host Language
+
+The .verb|ragel| program has a number of options for specifying the host
+language. The host-language options are:
+
+.itemize
+.item .verb|-C | for C/C++/Objective-C code (default)
+.item .verb|-D | for D code.
+.item .verb|-Z | for Go code.
+.item .verb|-J | for Java code.
+.item .verb|-R | for Ruby code.
+.item .verb|-A | for C\# code.
+.end itemize
+
+.section Choosing a Generated Code Style
+.label{genout}
+
+There are three styles of code output to choose from. Code style affects the
+size and speed of the compiled binary. Changing code style does not require any
+change to the Ragel program. There are two table-driven formats and a goto
+driven format.
+
+In addition to choosing a style to emit, there are various levels of action
+code reuse to choose from. The maximum reuse levels (.verb|-T0|, .verb|-F0|
+and .verb|-G0|) ensure that no FSM action code is ever duplicated by encoding
+each transition's action list as static data and iterating
+through the lists on every transition. This will normally result in a smaller
+binary. The less action reuse options (.verb|-T1|, .verb|-F1| and .verb|-G1|)
+will usually produce faster running code by expanding each transition's action
+list into a single block of code, eliminating the need to iterate through the
+lists. This duplicates action code instead of generating the logic necessary
+for reuse. Consequently the binary will be larger. However, this tradeoff applies to
+machines with moderate to dense action lists only. If a machine's transitions
+frequently have less than two actions then the less reuse options will actually
+produce both a smaller and a faster running binary due to less action sharing
+overhead. The best way to choose the appropriate code style for your
+application is to perform your own tests.
+
+The table-driven FSM represents the state machine as constant static data. There are
+tables of states, transitions, indices and actions. The current state is
+stored in a variable. The execution is simply a loop that looks up the current
+state, looks up the transition to take, executes any actions and moves to the
+target state. In general, the table-driven FSM can handle any machine, produces
+a smaller binary and requires a less expensive host language compile, but
+results in slower running code. Since the table-driven format is the most
+flexible it is the default code style.
+
+The flat table-driven machine is a table-based machine that is optimized for
+small alphabets. Where the regular table machine uses the current character as
+the key in a binary search for the transition to take, the flat table machine
+uses the current character as an index into an array of transitions. This is
+faster in general, however is only suitable if the span of possible characters
+is small.
+
+The goto-driven FSM represents the state machine using goto and switch
+statements. The execution is a flat code block where the transition to take is
+computed using switch statements and directly executable binary searches. In
+general, the goto FSM produces faster code but results in a larger binary and a
+more expensive host language compile.
+
+The goto-driven format has an additional action reuse level (.verb|-G2|) that
+writes actions directly into the state transitioning logic rather than putting
+all the actions together into a single switch. Generally this produces faster
+running code because it allows the machine to encode the current state using
+the processor's instruction pointer. Again, sparse machines may actually
+compile to smaller binaries when .verb|-G2| is used due to less state and
+action management overhead. For many parsing applications .verb|-G2| is the
+preferred output format.
+
+.center
+
+Code Output Style Options
+
+.tabular
+.row .verb|-T0|&binary search table-driven&C/D/Java/Ruby/C\#
+.row .verb|-T1|&binary search, expanded actions&C/D/Ruby/C\#
+.row .verb|-F0|&flat table-driven&C/D/Ruby/C\#
+.row .verb|-F1|&flat table, expanded actions&C/D/Ruby/C\#
+.row .verb|-G0|&goto-driven&C/D/C\#
+.row .verb|-G1|&goto, expanded actions&C/D/C\#
+.row .verb|-G2|&goto, in-place actions&C/D/Go
+.end tabular
+.end center
+
+.chapter Beyond the Basic Model
+
+.section Parser Modularization
+.label{modularization}
+
+It is possible to use Ragel's machine construction and action embedding
+operators to specify an entire parser using a single regular expression. In
+many cases this is the desired way to specify a parser in Ragel. However, in
+some scenarios the language to parse may be so large that it is difficult to
+think about it as a single regular expression. It may also shift between distinct
+parsing strategies, in which case modularization into several coherent blocks
+of the language may be appropriate.
+
+It may also be the case that patterns that compile to a large number of states
+must be used in a number of different contexts and referencing them in each
+context results in a very large state machine. In this case, an ability to reuse
+parsers would reduce code size.
+
+To address this, distinct regular expressions may be instantiated and linked
+together by means of a jumping and calling mechanism. This mechanism is
+analogous to the jumping to and calling of processor instructions. A jump
+command, given in action code, causes control to be immediately passed to
+another portion of the machine by way of setting the current state variable. A
+call command causes the target state of the current transition to be pushed to
+a state stack before control is transferred. Later on, the original location
+may be returned to with a return statement. In the following example, distinct
+state machines are used to handle the parsing of two types of headers.
+
+% GENERATE: call
+% %%{
+% machine call;
+.code
+action return { fret; }
+action call_date { fcall date; }
+action call_name { fcall name; }
+
+# A parser for date strings.
+date := [0-9][0-9] '/'
+ [0-9][0-9] '/'
+ [0-9][0-9][0-9][0-9] '\n' @return;
+
+# A parser for name strings.
+name := ( [a-zA-Z]+ | ' ' )** '\n' @return;
+
+# The main parser.
+headers =
+ ( 'from' | 'to' ) ':' @call_name |
+ ( 'departed' | 'arrived' ) ':' @call_date;
+
+main := headers*;
+.end code
+% }%%
+% %% write data;
+% void f()
+% {
+% %% write init;
+% %% write exec;
+% }
+% END GENERATE
+
+Calling and jumping should be used carefully as they are operations that take
+one out of the domain of regular languages. A machine that contains a call or
+jump statement in one of its actions should be used as an argument to a machine
+construction operator only with considerable care. Since DFA transitions may
+actually represent several NFA transitions, a call or jump embedded in one
+machine can inadvertently terminate another machine that it shares prefixes
+with. Despite this danger, theses statements have proven useful for tying
+together sub-parsers of a language into a parser for the full language,
+especially for the purpose of modularizing code and reducing the number of
+states when the machine contains frequently recurring patterns.
+
+Section .ref{vals} describes the jump and call statements that are used to
+transfer control. These statements make use of two variables that must be
+declared by the user, .verb|stack| and .verb|top|. The .verb|stack| variable
+must be an array of integers and .verb|top| must be a single integer, which
+will point to the next available space in .verb|stack|. Sections .ref{prepush}
+and .ref{postpop} describe the Pre-Push and Post-Pop statements which can be
+used to implement a dynamically resizable array.
+
+.section Referencing Names
+.label{labels}
+
+This section describes how to reference names in epsilon transitions (Section
+.ref{state-charts}) and
+action-based control-flow statements such as .verb|fgoto|. There is a hierarchy
+of names implied in a Ragel specification. At the top level are the machine
+instantiations. Beneath the instantiations are labels and references to machine
+definitions. Beneath those are more labels and references to definitions, and
+so on.
+
+Any name reference may contain multiple components separated with the .verb|::|
+compound symbol. The search for the first component of a name reference is
+rooted at the join expression that the epsilon transition or action embedding
+is contained in. If the name reference is not contained in a join,
+the search is rooted at the machine definition that the epsilon transition or
+action embedding is contained in. Each component after the first is searched
+for beginning at the location in the name tree that the previous reference
+component refers to.
+
+In the case of action-based references, if the action is embedded more than
+once, the local search is performed for each embedding and the result is the
+union of all the searches. If no result is found for action-based references then
+the search is repeated at the root of the name tree. Any action-based name
+search may be forced into a strictly global search by prefixing the name
+reference with .verb|::|.
+
+The final component of the name reference must resolve to a unique entry point.
+If a name is unique in the entire name tree it can be referenced as is. If it
+is not unique it can be specified by qualifying it with names above it in the
+name tree. However, it can always be renamed.
+
+% FIXME: Should fit this in somewhere.
+% Some kinds of name references are illegal. Cannot call into longest-match
+% machine, can only call its start state. Cannot make a call to anywhere from
+% any part of a longest-match machine except a rule's action. This would result
+% in an eventual return to some point inside a longest-match other than the
+% start state. This is banned for the same reason a call into the LM machine is
+% banned.
+
+
+.section Scanners
+.label{generating-scanners}
+
+Scanners are very much intertwined with regular-languages and their
+corresponding processors. For this reason Ragel supports the definition of
+scanners. The generated code will repeatedly attempt to match patterns from a
+list, favouring longer patterns over shorter patterns. In the case of
+equal-length matches, the generated code will favour patterns that appear ahead
+of others. When a scanner makes a match it executes the user code associated
+with the match, consumes the input then resumes scanning.
+
+.verbatim
+<machine_name> := |*
+ pattern1 => action1;
+ pattern2 => action2;
+ ...
+ *|;
+.end verbatim
+
+On the surface, Ragel scanners are similar to those defined by Lex. Though
+there is a key distinguishing feature: patterns may be arbitrary Ragel
+expressions and can therefore contain embedded code. With a Ragel-based scanner
+the user need not wait until the end of a pattern before user code can be
+executed.
+
+Scanners can be used to process sub-languages, as well as for tokenizing
+programming languages. In the following example a scanner is used to tokenize
+the contents of a header field.
+
+.code
+word = [a-z]+;
+head_name = 'Header';
+
+header := |*
+ word;
+ ' ';
+ '\n' => { fret; };
+*|;
+
+main := ( head_name ':' @{ fcall header; } )*;
+.end code
+
+The scanner construction has a purpose similar to the longest-match kleene star
+operator .verb|**|. The key
+difference is that a scanner is able to backtrack to match a previously matched
+shorter string when the pursuit of a longer string fails. For this reason the
+scanner construction operator is not a pure state machine construction
+operator. It relies on several variables that enable it to backtrack and make
+pointers to the matched input text available to the user. For this reason
+scanners must be immediately instantiated. They cannot be defined inline or
+referenced by another expression. Scanners must be jumped to or called.
+
+Scanners rely on the .verb|ts|, .verb|te| and .verb|act|
+variables to be present so that they can backtrack and make pointers to the
+matched text available to the user. If input is processed using multiple calls
+to the execute code then the user must ensure that when a token is only
+partially matched that the prefix is preserved on the subsequent invocation of
+the execute code.
+
+The .verb|ts| variable must be defined as a pointer to the input data.
+It is used for recording where the current token match begins. This variable
+may be used in action code for retrieving the text of the current match. Ragel
+ensures that in between tokens and outside of the longest-match machines that
+this pointer is set to null. In between calls to the execute code the user must
+check if .verb|ts| is set and if so, ensure that the data it points to is
+preserved ahead of the next buffer block. This is described in more detail
+below.
+
+The .verb|te| variable must also be defined as a pointer to the input data.
+It is used for recording where a match ends and where scanning of the next
+token should begin. This can also be used in action code for retrieving the
+text of the current match.
+
+The .verb|act| variable must be defined as an integer type. It is used for
+recording the identity of the last pattern matched when the scanner must go
+past a matched pattern in an attempt to make a longer match. If the longer
+match fails it may need to consult the .verb|act| variable. In some cases, use
+of the .verb|act|
+variable can be avoided because the value of the current state is enough
+information to determine which token to accept, however in other cases this is
+not enough and so the .verb|act| variable is used.
+
+When the longest-match operator is in use, the user's driver code must take on
+some buffer management functions. The following algorithm gives an overview of
+the steps that should be taken to properly use the longest-match operator.
+
+.itemize
+.item Read a block of input data.
+.item Run the execute code.
+.item If .verb|ts| is set, the execute code will expect the incomplete
+token to be preserved ahead of the buffer on the next invocation of the execute
+code.
+.itemize
+.item Shift the data beginning at .verb|ts| and ending at .verb|pe| to the
+beginning of the input buffer.
+.item Reset .verb|ts| to the beginning of the buffer.
+.item Shift .verb|te| by the distance from the old value of .verb|ts|
+to the new value. The .verb|te| variable may or may not be valid. There is
+no way to know if it holds a meaningful value because it is not kept at null
+when it is not in use. It can be shifted regardless.
+.end itemize
+.item Read another block of data into the buffer, immediately following any
+preserved data.
+.item Run the scanner on the new data.
+.end itemize
+
+Figure .ref{preserve_example} shows the required handling of an input stream in
+which a token is broken by the input block boundaries. After processing up to
+and including the ``t'' of ``characters'', the prefix of the string token must be
+retained and processing should resume at the ``e'' on the next iteration of
+the execute code.
+
+If one uses a large input buffer for collecting input then the number of times
+the shifting must be done will be small. Furthermore, if one takes care not to
+define tokens that are allowed to be very long and instead processes these
+items using pure state machines or sub-scanners, then only a small amount of
+data will ever need to be shifted.
+
+.figure preserve_example
+.verbatim
+ a) A stream "of characters" to be scanned.
+ | | |
+ p ts pe
+
+ b) "of characters" to be scanned.
+ | | |
+ ts p pe
+.end verbatim
+.caption Following an invocation of the execute code there may be a partially
+matched token (a). The data of the partially matched token
+must be preserved ahead of the new data on the next invocation (b).
+.end figure
+
+Since scanners attempt to make the longest possible match of input, patterns
+such as identifiers require one character of lookahead in order to trigger a
+match. In the case of the last token in the input stream the user must ensure
+that the .verb|eof| variable is set so that the final token is flushed out.
+
+An example scanner processing loop is given in Figure .ref{scanner-loop}.
+
+.figure scanner-loop
+.verbatim
+ int have = 0;
+ bool done = false;
+ while ( !done ) {
+ /* How much space is in the buffer? */
+ int space = BUFSIZE - have;
+ if ( space == 0 ) {
+ /* Buffer is full. */
+ cerr << "TOKEN TOO BIG" << endl;
+ exit(1);
+ }
+
+ /* Read in a block after any data we already have. */
+ char *p = inbuf + have;
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ char *pe = p + len;
+ char *eof = 0;
+
+ /* If no data was read indicate EOF. */
+ if ( len == 0 ) {
+ eof = pe;
+ done = true;
+ }
+
+ %% write exec;
+
+ if ( cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ if ( ts == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - ts;
+ memmove( inbuf, ts, have );
+ te = inbuf + (te-ts);
+ ts = inbuf;
+ }
+ }
+.end verbatim
+.caption A processing loop for a scanner.
+.end figure
+
+.section State Charts
+.label{state-charts}
+
+In addition to supporting the construction of state machines using regular
+languages, Ragel provides a way to manually specify state machines using
+state charts. The comma operator combines machines together without any
+implied transitions. The user can then manually link machines by specifying
+epsilon transitions with the .verb|->| operator. Epsilon transitions are drawn
+between the final states of a machine and entry points defined by labels. This
+makes it possible to build machines using the explicit state-chart method while
+making minimal changes to the Ragel language.
+
+An interesting feature of Ragel's state chart construction method is that it
+can be mixed freely with regular expression constructions. A state chart may be
+referenced from within a regular expression, or a regular expression may be
+used in the definition of a state chart transition.
+
+.subsection Join
+
+.verb|expr , expr , ...|
+
+Join a list of machines together without
+drawing any transitions, without setting up a start state, and without
+designating any final states. Transitions between the machines may be specified
+using labels and epsilon transitions. The start state must be explicity
+specified with the ``start'' label. Final states may be specified with an
+epsilon transition to the implicitly created ``final'' state. The join
+operation allows one to build machines using a state chart model.
+
+.subsection Label
+
+.verb|label: expr|
+
+Attaches a label to an expression. Labels can be
+used as the target of epsilon transitions and explicit control transfer
+statements such as .verb|fgoto| and .verb|fnext| in action
+code.
+
+.subsection Epsilon
+
+.verb|expr -> label|
+
+Draws an epsilon transition to the state defined
+by .verb|label|. Epsilon transitions are made deterministic when join
+operators are evaluated. Epsilon transitions that are not in a join operation
+are made deterministic when the machine definition that contains the epsilon is
+complete. See Section .ref{labels} for information on referencing labels.
+
+.subsection Simplifying State Charts
+
+There are two benefits to providing state charts in Ragel. The first is that it
+allows us to take a state chart with a full listing of states and transitions
+and simplify it in selective places using regular expressions.
+
+The state chart method of specifying parsers is very common. It is an
+effective programming technique for producing robust code. The key disadvantage
+becomes clear when one attempts to comprehend a large parser specified in this
+way. These programs usually require many lines, causing logic to be spread out
+over large distances in the source file. Remembering the function of a large
+number of states can be difficult and organizing the parser in a sensible way
+requires discipline because branches and repetition present many file layout
+options. This kind of programming takes a specification with inherent
+structure such as looping, alternation and concatenation and expresses it in a
+flat form.
+
+If we could take an isolated component of a manually programmed state chart,
+that is, a subset of states that has only one entry point, and implement it
+using regular language operators then we could eliminate all the explicit
+naming of the states contained in it. By eliminating explicitly named states
+and replacing them with higher-level specifications we simplify a state machine
+specification.
+
+For example, sometimes chains of states are needed, with only a small number of
+possible characters appearing along the chain. These can easily be replaced
+with a concatenation of characters. Sometimes a group of common states
+implement a loop back to another single portion of the machine. Rather than
+manually duplicate all the transitions that loop back, we may be able to
+express the loop using a kleene star operator.
+
+Ragel allows one to take this state map simplification approach. We can build
+state machines using a state map model and implement portions of the state map
+using regular languages. In place of any transition in the state machine,
+entire sub-machines can be given. These can encapsulate functionality
+defined elsewhere. An important aspect of the Ragel approach is that when we
+wrap up a collection of states using a regular expression we do not lose
+access to the states and transitions. We can still execute code on the
+transitions that we have encapsulated.
+
+.subsection Dropping Down One Level of Abstraction
+.label{down}
+
+The second benefit of incorporating state charts into Ragel is that it permits
+us to bypass the regular language abstraction if we need to. Ragel's action
+embedding operators are sometimes insufficient for expressing certain parsing
+tasks. In the same way that is useful for C language programmers to drop down
+to assembly language programming using embedded assembler, it is sometimes
+useful for the Ragel programmer to drop down to programming with state charts.
+
+In the following example, we wish to buffer the characters of an XML CDATA
+sequence. The sequence is terminated by the string .verb|]]>|. The challenge
+in our application is that we do not wish the terminating characters to be
+buffered. An expression of the form .verb|any* @buffer :>> ']]>'| will not work
+because the buffer will always contain the characters .verb|]]| on the end.
+Instead, what we need is to delay the buffering of .verb|]|
+characters until a time when we
+abandon the terminating sequence and go back into the main loop. There is no
+easy way to express this using Ragel's regular expression and action embedding
+operators, and so an ability to drop down to the state chart method is useful.
+
+% GENERATE: dropdown
+% OPT: -p
+% %%{
+% machine dropdown;
+.code
+action bchar { buff( fpc ); } # Buffer the current character.
+action bbrack1 { buff( "]" ); }
+action bbrack2 { buff( "]]" ); }
+
+CDATA_body =
+start: (
+ ']' -> one |
+ (any-']') @bchar ->start
+),
+one: (
+ ']' -> two |
+ [^\]] @bbrack1 @bchar ->start
+),
+two: (
+ '>' -> final |
+ ']' @bbrack1 -> two |
+ [^>\]] @bbrack2 @bchar ->start
+);
+.end code
+% main := CDATA_body;
+% }%%
+% END GENERATE
+
+.graphic dropdown
+
+
+.section Semantic Conditions
+.label{semantic}
+
+Many communication protocols contain variable-length fields, where the length
+of the field is given ahead of the field as a value. This
+problem cannot be expressed using regular languages because of its
+context-dependent nature. The prevalence of variable-length fields in
+communication protocols motivated us to introduce semantic conditions into
+the Ragel language.
+
+A semantic condition is a block of user code that is interpreted as an
+expression and evaluated immediately
+before a transition is taken. If the code returns a value of true, the
+transition may be taken. We can now embed code that extracts the length of a
+field, then proceed to match $n$ data values.
+
+% GENERATE: conds1
+% OPT: -p
+% %%{
+% machine conds1;
+% number = digit+;
+.code
+action rec_num { i = 0; n = getnumber(); }
+action test_len { i++ < n }
+data_fields = (
+ 'd'
+ [0-9]+ %rec_num
+ ':'
+ ( [a-z] when test_len )*
+)**;
+.end code
+% main := data_fields;
+% }%%
+% END GENERATE
+
+.graphic conds1
+
+The Ragel implementation of semantic conditions does not force us to give up the
+compositional property of Ragel definitions. For example, a machine that tests
+the length of a field using conditions can be unioned with another machine
+that accepts some of the same strings, without the two machines interfering with
+one another. The user need not be concerned about whether or not the result of the
+semantic condition will affect the matching of the second machine.
+
+To see this, first consider that when a user associates a condition with an
+existing transition, the transition's label is translated from the base character
+to its corresponding value in the space that represents ``condition $c$ true''. Should
+the determinization process combine a state that has a conditional transition
+with another state that has a transition on the same input character but
+without a condition, then the condition-less transition first has its label
+translated into two values, one to its corresponding value in the space that
+represents ``condition $c$ true'' and another to its corresponding value in the
+space that represents ``condition $c$ false''. It
+is then safe to combine the two transitions. This is shown in the following
+example. Two intersecting patterns are unioned, one with a condition and one
+without. The condition embedded in the first pattern does not affect the second
+pattern.
+
+% GENERATE: conds2
+% OPT: -p
+% %%{
+% machine conds2;
+% number = digit+;
+.code
+action test_len { i++ < n }
+action one { /* accept pattern one */ }
+action two { /* accept pattern two */ }
+patterns =
+ ( [a-z] when test_len )+ %one |
+ [a-z][a-z0-9]* %two;
+main := patterns '\n';
+.end code
+% }%%
+% END GENERATE
+
+.graphic conds2
+
+There are many more potential uses for semantic conditions. The user is free to
+use arbitrary code and may therefore perform actions such as looking up names
+in dictionaries, validating input using external parsing mechanisms or
+performing checks on the semantic structure of input seen so far. In the next
+section we describe how Ragel accommodates several common parser engineering
+problems.
+
+The semantic condition feature works only with alphabet types that are smaller
+in width than the .verb|long| type. To implement semantic conditions Ragel
+needs to be able to allocate characters from the alphabet space. Ragel uses
+these allocated characters to express "character C with condition P true" or "C
+with P false." Since internally Ragel uses longs to store characters there is
+no room left in the alphabet space unless an alphabet type smaller than long is
+used.
+
+.section Implementing Lookahead
+
+There are a few strategies for implementing lookahead in Ragel programs.
+Leaving actions, which are described in Section .ref{out-actions}, can be
+used as a form of lookahead. Ragel also provides the .verb|fhold| directive
+which can be used in actions to prevent the machine from advancing over the
+current character. It is also possible to manually adjust the current character
+position by shifting it backwards using .verb|fexec|, however when this is
+done, care must be taken not to overstep the beginning of the current buffer
+block. In both the use of .verb|fhold| and .verb|fexec| the user must be
+cautious of combining the resulting machine with another in such a way that the
+transition on which the current position is adjusted is not combined with a
+transition from the other machine.
+
+.section Parsing Recursive Language Structures
+
+In general Ragel cannot handle recursive structures because the grammar is
+interpreted as a regular language. However, depending on what needs to be
+parsed it is sometimes practical to implement the recursive parts using manual
+coding techniques. This often works in cases where the recursive structures are
+simple and easy to recognize, such as in the balancing of parentheses
+
+One approach to parsing recursive structures is to use actions that increment
+and decrement counters or otherwise recognize the entry to and exit from
+recursive structures and then jump to the appropriate machine defnition using
+.verb|fcall| and .verb|fret|. Alternatively, semantic conditions can be used to
+test counter variables.
+
+A more traditional approach is to call a separate parsing function (expressed
+in the host language) when a recursive structure is entered, then later return
+when the end is recognized.
+##### EXP #####
+\documentclass[letterpaper,11pt,oneside]{book}
+\usepackage{graphicx}
+\usepackage{comment}
+\usepackage{multicol}
+\usepackage[
+ colorlinks=true,
+ linkcolor=black,
+ citecolor=green,
+ filecolor=black,
+ urlcolor=black]{hyperref}
+
+\topmargin -0.20in
+\oddsidemargin 0in
+\textwidth 6.5in
+\textheight 9in
+
+\setlength{\parskip}{0pt}
+\setlength{\topsep}{0pt}
+\setlength{\partopsep}{0pt}
+\setlength{\itemsep}{0pt}
+
+\input{version}
+
+\newcommand{\verbspace}{\vspace{10pt}}
+\newcommand{\graphspace}{\vspace{10pt}}
+
+\renewcommand\floatpagefraction{.99}
+\renewcommand\topfraction{.99}
+\renewcommand\bottomfraction{.99}
+\renewcommand\textfraction{.01}
+\setcounter{totalnumber}{50}
+\setcounter{topnumber}{50}
+\setcounter{bottomnumber}{50}
+
+\newenvironment{inline_code}{\def\baselinestretch{1}\vspace{12pt}\small}{}
+
+\begin{document}
+
+\thispagestyle{empty}
+\begin{center}
+\vspace*{3in}
+{\huge Ragel State Machine Compiler}\\
+\vspace*{12pt}
+{\Large User Guide}\\
+\vspace{1in}
+by\\
+\vspace{12pt}
+{\large Adrian Thurston}\\
+\end{center}
+\clearpage
+
+\pagenumbering{roman}
+
+\chapter*{License}
+Ragel version \version, \pubdate\\
+Copyright \copyright\ 2003-2012 Adrian D. Thurston
+\vspace{6mm}
+
+{\bf\it\noindent This document is part of Ragel, and as such, this document is
+released under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+}
+
+\vspace{5pt}
+
+{\bf\it\noindent Ragel is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+details.
+}
+
+\vspace{5pt}
+
+{\bf\it\noindent You should have received a copy of the GNU General Public
+License along with Ragel; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+}
+
+\clearpage
+\tableofcontents
+\clearpage
+
+\pagenumbering{arabic}
+\chapter{Introduction}
+
+\section{Abstract}
+
+Regular expressions are used heavily in practice for the purpose of specifying
+parsers. They are normally used as black boxes linked together with program
+logic. User actions are executed in between invocations of the regular
+expression engine. Adding actions before a pattern terminates requires patterns
+to be broken and pasted back together with program logic. The more user actions
+are needed, the less the advantages of regular expressions are seen.
+
+Ragel is a software development tool that allows user actions to be
+embedded into the transitions of a regular expression's corresponding state
+machine, eliminating the need to switch from the regular expression engine and
+user code execution environment and back again. As a result, expressions can be
+maximally continuous. One is free to specify an entire parser using a single
+regular expression. The single-expression model affords concise and elegant
+descriptions of languages and the generation of very simple, fast and robust
+code. Ragel compiles executable finite state machines from a high level regular language
+notation. Ragel targets C, C++, Objective-C, D, Go, Java and Ruby.
+
+In addition to building state machines from regular expressions, Ragel allows
+the programmer to directly specify state machines with state charts. These two
+notations may be freely combined. There are also facilities for controlling
+nondeterminism in the resulting machines and building scanners using patterns
+that themselves have embedded actions. Ragel can produce code that is small and
+runs very fast. Ragel can handle integer-sized alphabets and can compile very
+large state machines.
+
+\section{Motivation}
+
+When a programmer is faced with the task of producing a parser for a
+context-free language there are many tools to choose from. It is quite common
+to generate useful and efficient parsers for programming languages from a
+formal grammar. It is also quite common for programmers to avoid such tools
+when making parsers for simple computer languages, such as file formats and
+communication protocols. Such languages are often regular and tools for
+processing the context-free languages are viewed as too heavyweight for the
+purpose of parsing regular languages. The extra run-time effort required for
+supporting the recursive nature of context-free languages is wasted.
+
+When we turn to the regular expression-based parsing tools, such as Lex, Re2C,
+and scripting languages such as Sed, Awk and Perl we find that they are split
+into two levels: a regular expression matching engine and some kind of program
+logic for linking patterns together. For example, a Lex program is composed of
+sets of regular expressions. The implied program logic repeatedly attempts to
+match a pattern in the current set. When a match is found the associated user
+code executed. It requires the user to consider a language as a sequence of
+independent tokens. Scripting languages and regular expression libraries allow
+one to link patterns together using arbitrary program code. This is very
+flexible and powerful, however we can be more concise and clear if we avoid
+gluing together regular expressions with if statements and while loops.
+
+This model of execution, where the runtime alternates between regular
+expression matching and user code exectution places restrictions on when
+action code may be executed. Since action code can only be associated with
+complete patterns, any action code that must be executed before an entire
+pattern is matched requires that the pattern be broken into smaller units.
+Instead of being forced to disrupt the regular expression syntax and write
+smaller expressions, it is desirable to retain a single expression and embed
+code for performing actions directly into the transitions that move over the
+characters. After all, capable programmers are astutely aware of the machinery
+underlying their programs, so why not provide them with access to that
+machinery? To achieve this we require an action execution model for associating
+code with the sub-expressions of a regular expression in a way that does not
+disrupt its syntax.
+
+The primary goal of Ragel is to provide developers with an ability to embed
+actions into the transitions and states of a regular expression's state machine
+in support of the definition of entire parsers or large sections of parsers
+using a single regular expression. From the regular expression we gain a clear
+and concise statement of our language. From the state machine we obtain a very
+fast and robust executable that lends itself to many kinds of analysis and
+visualization.
+
+\section{Overview}
+
+Ragel is a language for specifying state machines. The Ragel program is a
+compiler that assembles a state machine definition to executable code. Ragel
+is based on the principle that any regular language can be converted to a
+deterministic finite state automaton. Since every regular language has a state
+machine representation and vice versa, the terms regular language and state
+machine (or just machine) will be used interchangeably in this document.
+
+Ragel outputs machines to C, C++, Objective-C, D, Go, Java or Ruby code. The output is
+designed to be generic and is not bound to any particular input or processing
+method. A Ragel machine expects to have data passed to it in buffer blocks.
+When there is no more input, the machine can be queried for acceptance. In
+this way, a Ragel machine can be used to simply recognize a regular language
+like a regular expression library. By embedding code into the regular language,
+a Ragel machine can also be used to parse input.
+
+The Ragel language has many operators for constructing and manipulating
+machines. Machines are built up from smaller machines, to bigger ones, to the
+final machine representing the language that needs to be recognized or parsed.
+
+The core state machine construction operators are those found in most theory
+of computation textbooks. They date back to the 1950s and are widely studied.
+They are based on set operations and permit one to think of languages as a set
+of strings. They are Union, Intersection, Difference, Concatenation and Kleene
+Star. Put together, these operators make up what most people know as regular
+expressions. Ragel also provides a scanner construction operator
+and provides operators for explicitly constructing machines
+using a state chart method. In the state chart method, one joins machines
+together without any implied transitions and then explicitly specifies where
+epsilon transitions should be drawn.
+
+The state machine manipulation operators are specific to Ragel. They allow the
+programmer to access the states and transitions of regular language's
+corresponding machine. There are two uses of the manipulation operators. The
+first and primary use is to embed code into transitions and states, allowing
+the programmer to specify the actions of the state machine.
+
+Ragel attempts to make the action embedding facility as intuitive as possible.
+To do so, a number of issues need to be addressed. For example, when making a
+nondeterministic specification into a DFA using machines that have embedded
+actions, new transitions are often made that have the combined actions of
+several source transitions. Ragel ensures that multiple actions associated with
+a single transition are ordered consistently with respect to the order of
+reference and the natural ordering implied by the construction operators.
+
+The second use of the manipulation operators is to assign priorities to
+transitions. Priorities provide a convenient way of controlling any
+nondeterminism introduced by the construction operators. Suppose two
+transitions leave from the same state and go to distinct target states on the
+same character. If these transitions are assigned conflicting priorities, then
+during the determinization process the transition with the higher priority will
+take precedence over the transition with the lower priority. The lower priority
+transition gets abandoned. The transitions would otherwise be combined into a new
+transition that goes to a new state that is a combination of the original
+target states. Priorities are often required for segmenting machines. The most
+common uses of priorities have been encoded into a set of simple operators
+that should be used instead of priority embeddings whenever possible.
+
+For the purposes of embedding, Ragel divides transitions and states into
+different classes. There are four operators for embedding actions and
+priorities into the transitions of a state machine. It is possible to embed
+into entering transitions, finishing transitions, all transitions and leaving
+transitions. The embedding into leaving transitions is a special case.
+These transition embeddings get stored in the final states of a machine. They
+are transferred to any transitions that are made going out of the machine by
+future concatenation or kleene star operations.
+
+There are several more operators for embedding actions into states. Like the
+transition embeddings, there are various different classes of states that the
+embedding operators access. For example, one can access start states, final
+states or all states, among others. Unlike the transition embeddings, there are
+several different types of state action embeddings. These are executed at
+various different times during the processing of input. It is possible to embed
+actions that are exectued on transitions into a state, on transitions out of a
+state, on transitions taken on the error event, or on transitions taken on the
+EOF event.
+
+Within actions, it is possible to influence the behaviour of the state machine.
+The user can write action code that jumps or calls to another portion of the
+machine, changes the current character being processed, or breaks out of the
+processing loop. With the state machine calling feature Ragel can be used to
+parse languages that are not regular. For example, one can parse balanced
+parentheses by calling into a parser when an open parenthesis character is seen
+and returning to the state on the top of the stack when the corresponding
+closing parenthesis character is seen. More complicated context-free languages
+such as expressions in C are out of the scope of Ragel.
+
+Ragel also provides a scanner construction operator that can be used to build
+scanners much the same way that Lex is used. The Ragel generated code, which
+relies on user-defined variables for backtracking, repeatedly tries to match
+patterns to the input, favouring longer patterns over shorter ones and patterns
+that appear ahead of others when the lengths of the possible matches are
+identical. When a pattern is matched the associated action is executed.
+
+The key distinguishing feature between scanners in Ragel and scanners in Lex is
+that Ragel patterns may be arbitrary Ragel expressions and can therefore
+contain embedded code. With a Ragel-based scanner the user need not wait until
+the end of a pattern before user code can be executed.
+
+Scanners do take Ragel out of the domain of pure state machines and require the
+user to maintain the backtracking related variables. However, scanners
+integrate well with regular state machine instantiations. They can be called to
+or jumped to only when needed, or they can be called out of or jumped out of
+when a simpler, pure state machine model is appropriate.
+
+Two types of output code style are available. Ragel can produce a table-driven
+machine or a directly executable machine. The directly executable machine is
+much faster than the table-driven. On the other hand, the table-driven machine
+is more compact and less demanding on the host language compiler. It is better
+suited to compiling large state machines.
+
+\section{Related Work}
+
+Lex is perhaps the best-known tool for constructing parsers from regular
+expressions. In the Lex processing model, generated code attempts to match one
+of the user's regular expression patterns, favouring longer matches over
+shorter ones. Once a match is made it then executes the code associated with
+the pattern and consumes the matching string. This process is repeated until
+the input is fully consumed.
+
+Through the use of start conditions, related sets of patterns may be defined.
+The active set may be changed at any time. This allows the user to define
+different lexical regions. It also allows the user to link patterns together by
+requiring that some patterns come before others. This is quite like a
+concatenation operation. However, use of Lex for languages that require a
+considerable amount of pattern concatenation is inappropriate. In such cases a
+Lex program deteriorates into a manually specified state machine, where start
+conditions define the states and pattern actions define the transitions. Lex
+is therefore best suited to parsing tasks where the language to be parsed can
+be described in terms of regions of tokens.
+
+Lex is useful in many scenarios and has undoubtedly stood the test of time.
+There are, however, several drawbacks to using Lex. Lex can impose too much
+overhead for parsing applications where buffering is not required because all
+the characters are available in a single string. In these cases there is
+structure to the language to be parsed and a parser specification tool can
+help, but employing a heavyweight processing loop that imposes a stream
+``pull'' model and dynamic input buffer allocation is inappropriate. An
+example of this kind of scenario is the conversion of floating point numbers
+contained in a string to their corresponding numerical values.
+
+Another drawback is the very issue that Ragel attempts to solve.
+It is not possible to execute a user action while
+matching a character contained inside a pattern. For example, if scanning a
+programming language and string literals can contain newlines which must be
+counted, a Lex user must break up a string literal pattern so as to associate
+an action with newlines. This forces the definition of a new start condition.
+Alternatively the user can reprocess the text of the matched string literal to
+count newlines.
+
+
+The Re2C program defines an input processing model similar to that of Lex.
+Re2C focuses on making generated state machines run very fast and
+integrate easily into any program, free of dependencies. Re2C generates
+directly executable code and is able to claim that generated parsers run nearly
+as fast as their hand-coded equivalents. This is very important for user
+adoption, as programmers are reluctant to use a tool when a faster alternative
+exists. A consideration to ease of use is also important because developers
+need the freedom to integrate the generated code as they see fit.
+
+Many scripting languages provide ways of composing parsers by linking regular
+expressions using program logic. For example, Sed and Awk are two established
+Unix scripting tools that allow the programmer to exploit regular expressions
+for the purpose of locating and extracting text of interest. High-level
+programming languages such as Perl, Python, PHP and Ruby all provide regular
+expression libraries that allow the user to combine regular expressions with
+arbitrary code.
+
+In addition to supporting the linking of regular expressions with arbitrary
+program logic, the Perl programming language permits the embedding of code into
+regular expressions. Perl embeddings do not translate into the embedding of
+code into deterministic state machines. Perl regular expressions are in fact
+not fully compiled to deterministic machines when embedded code is involved.
+They are instead interpreted and involve backtracking. This is shown by the
+following Perl program. When it is fed the input \verb|abcd| the interpretor
+attempts to match the first alternative, printing \verb|a1 b1|. When this
+possibility fails it backtracks and tries the second possibility, printing
+\verb|a2 b2|, at which point it succeeds.
+
+\begin{inline_code}
+\begin{verbatim}
+print "YES\n" if ( <STDIN> =~
+ /( a (?{ print "a1 "; }) b (?{ print "b1 "; }) cX ) |
+ ( a (?{ print "a2 "; }) b (?{ print "b2 "; }) cd )/x )
+\end{verbatim}
+\end{inline_code}
+\verbspace
+
+In Ragel there is no regular expression interpretor. Aside from the scanner
+operator, all Ragel expressions are made into deterministic machines and the
+run time simply moves from state to state as it consumes input. An equivalent
+parser expressed in Ragel would attempt both of the alternatives concurrently,
+printing \verb|a1 a2 b1 b2|.
+
+\section{Development Status}
+
+Ragel is a relatively new tool and is under continuous development. As a rough
+release guide, minor revision number changes are for implementation
+improvements and feature additions. Major revision number changes are for
+implementation and language changes that do not preserve backwards
+compatibility. Though in the past this has not always held true: changes that
+break code have crept into minor version number changes. Typically, the
+documentation lags behind the development in the interest of documenting only
+the lasting features. The latest changes are always documented in the ChangeLog
+file.
+
+\chapter{Constructing State Machines}
+
+\section{Ragel State Machine Specifications}
+
+A Ragel input file consists of a program in the host language that contains embedded machine
+specifications. Ragel normally passes input straight to output. When it sees
+a machine specification it stops to read the Ragel statements and possibly generate
+code in place of the specification.
+Afterwards it continues to pass input through. There
+can be any number of FSM specifications in an input file. A multi-line FSM spec
+starts with \verb|%%{| and ends with \verb|}%%|. A single-line FSM spec starts
+with \verb|%%| and ends at the first newline.
+
+While Ragel is looking for FSM specifications it does basic lexical analysis on
+the surrounding input. It interprets literal strings and comments so a
+\verb|%%| sequence in either of those will not trigger the parsing of an FSM
+specification. Ragel does not pass the input through any preprocessor nor does it
+interpret preprocessor directives itself so includes, defines and ifdef logic
+cannot be used to alter the parse of a Ragel input file. It is therefore not
+possible to use an \verb|#if 0| directive to comment out a machine as is
+commonly done in C code. As an alternative, a machine can be prevented from
+causing any generated output by commenting out write statements.
+
+In Figure \ref{cmd-line-parsing}, a multi-line specification is used to define the
+machine and single line specifications are used to trigger the writing of the machine
+data and execution code.
+
+\begin{figure}
+\small
+\begin{multicols}{2}
+\begin{verbatim}
+#include <string.h>
+#include <stdio.h>
+
+%%{
+ machine foo;
+ main :=
+ ( 'foo' | 'bar' )
+ 0 @{ res = 1; };
+}%%
+
+%% write data;
+\end{verbatim}
+\verbspace
+\columnbreak
+\begin{verbatim}
+int main( int argc, char **argv )
+{
+ int cs, res = 0;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ char *pe = p + strlen(p) + 1;
+ %% write init;
+ %% write exec;
+ }
+ printf("result = %i\n", res );
+ return 0;
+}
+\end{verbatim}
+\verbspace
+\end{multicols}
+\caption{Parsing a command line argument.
+}
+\label{cmd-line-parsing}
+\end{figure}
+
+\subsection{Naming Ragel Blocks}
+
+\begin{verbatim}
+machine fsm_name;
+\end{verbatim}
+\verbspace
+
+The \verb|machine| statement gives the name of the FSM. If present in a
+specification, this statement must appear first. If a machine specification
+does not have a name then Ragel uses the previous specification name. If no
+previous specification name exists then this is an error. Because FSM
+specifications persist in memory, a machine's statements can be spread across
+multiple machine specifications. This allows one to break up a machine across
+several files or draw in statements that are common to multiple machines using
+the \verb|include| statement.
+
+\subsection{Machine Definition}
+\label{definition}
+
+\begin{verbatim}
+<name> = <expression>;
+\end{verbatim}
+\verbspace
+
+The machine definition statement associates an FSM expression with a name. Machine
+expressions assigned to names can later be referenced in other expressions. A
+definition statement on its own does not cause any states to be generated. It is simply a
+description of a machine to be used later. States are generated only when a definition is
+instantiated, which happens when a definition is referenced in an instantiated
+expression.
+
+\subsection{Machine Instantiation}
+\label{instantiation}
+
+\begin{verbatim}
+<name> := <expression>;
+\end{verbatim}
+\verbspace
+
+The machine instantiation statement generates a set of states representing an
+expression. Each instantiation generates a distinct set of states. The starting
+state of the instantiation is written in the data section of the generated code
+using the instantiation name. If a machine named
+\verb|main| is instantiated, its start state is used as the
+specification's start state and is assigned to the \verb|cs| variable by the
+\verb|write init| command. If no \verb|main| machine is given, the start state
+of the last machine instantiation to appear is used as the specification's
+start state.
+
+From outside the execution loop, control may be passed to any machine by
+assigning the entry point to the \verb|cs| variable. From inside the execution
+loop, control may be passed to any machine instantiation using \verb|fcall|,
+\verb|fgoto| or \verb|fnext| statements.
+
+\subsection{Including Ragel Code}
+
+\begin{verbatim}
+include FsmName "inputfile.rl";
+\end{verbatim}
+\verbspace
+
+The \verb|include| statement can be used to draw in the statements of another FSM
+specification. Both the name and input file are optional, however at least one
+must be given. Without an FSM name, the given input file is searched for an FSM
+of the same name as the current specification. Without an input file the
+current file is searched for a machine of the given name. If both are present,
+the given input file is searched for a machine of the given name.
+
+Ragel searches for included files from the location of the current file.
+Additional directories can be added to the search path using the \verb|-I|
+option.
+
+\subsection{Importing Definitions}
+\label{import}
+
+\begin{verbatim}
+import "inputfile.h";
+\end{verbatim}
+\verbspace
+
+The \verb|import| statement scrapes a file for sequences of tokens that match
+the following forms. Ragel treats these forms as state machine definitions.
+
+\noindent\hspace*{24pt}\verb|name '=' number|\\
+\noindent\hspace*{24pt}\verb|name '=' lit_string|\\
+\noindent\hspace*{24pt}\verb|'define' name number|\\
+\noindent\hspace*{24pt}\verb|'define' name lit_string|
+\vspace{12pt}
+
+If the input file is a Ragel program then tokens inside any Ragel
+specifications are ignored. See Section \ref{export} for a description of
+exporting machine definitions.
+
+Ragel searches for imported files from the location of the current file.
+Additional directories can be added to the search path using the \verb|-I|
+option.
+
+\section{Lexical Analysis of a Ragel Block}
+\label{lexing}
+
+Within a machine specification the following lexical rules apply to the input.
+
+\begin{itemize}
+
+\item The \verb|#| symbol begins a comment that terminates at the next newline.
+
+\item The symbols \verb|""|, \verb|''|, \verb|//|, \verb|[]| behave as the
+delimiters of literal strings. Within them, the following escape sequences
+are interpreted:
+
+\verb| \0 \a \b \t \n \v \f \r|
+
+A backslash at the end of a line joins the following line onto the current. A
+backslash preceding any other character removes special meaning. This applies
+to terminating characters and to special characters in regular expression
+literals. As an exception, regular expression literals do not support escape
+sequences as the operands of a range within a list. See the bullet on regular
+expressions in Section \ref{basic}.
+
+\item The symbols \verb|{}| delimit a block of host language code that will be
+embedded into the machine as an action. Within the block of host language
+code, basic lexical analysis of comments and strings is done in order to
+correctly find the closing brace of the block. With the exception of FSM
+commands embedded in code blocks, the entire block is preserved as is for
+identical reproduction in the output code.
+
+\item The pattern \verb|[+-]?[0-9]+| denotes an integer in decimal format.
+Integers used for specifying machines may be negative only if the alphabet type
+is signed. Integers used for specifying priorities may be positive or negative.
+
+\item The pattern \verb|0x[0-9A-Fa-f]+| denotes an integer in hexadecimal
+format.
+
+\item The keywords are \verb|access|, \verb|action|, \verb|alphtype|,
+\verb|getkey|, \verb|write|, \verb|machine| and \verb|include|.
+
+\item The pattern \verb|[a-zA-Z_][a-zA-Z_0-9]*| denotes an identifier.
+
+
+\item Any amount of whitespace may separate tokens.
+
+\end{itemize}
+
+
+\section{Basic Machines}
+\label{basic}
+
+The basic machines are the base operands of regular language expressions. They
+are the smallest unit to which machine construction and manipulation operators
+can be applied.
+
+\begin{itemize}
+
+\item \verb|'hello'| -- Concatenation Literal. Produces a machine that matches
+the sequence of characters in the quoted string. If there are 5 characters
+there will be 6 states chained together with the characters in the string. See
+Section \ref{lexing} for information on valid escape sequences.
+
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{bmconcat}
+\end{center}
+\graphspace
+
+It is possible
+to make a concatenation literal case-insensitive by appending an \verb|i| to
+the string, for example \verb|'cmd'i|.
+
+\item \verb|"hello"| -- Identical to the single quoted version.
+
+\item \verb|[hello]| -- Or Expression. Produces a union of characters. There
+will be two states with a transition for each unique character between the two states.
+The \verb|[]| delimiters behave like the quotes of a literal string. For example,
+\verb|[ \t]| means tab or space. The \verb|or| expression supports character ranges
+with the \verb|-| symbol as a separator. The meaning of the union can be negated
+using an initial \verb|^| character as in standard regular expressions.
+See Section \ref{lexing} for information on valid escape sequences
+in \verb|or| expressions.
+
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{bmor}
+\end{center}
+\graphspace
+
+\item \verb|''|, \verb|""|, and \verb|[]| -- Zero Length Machine. Produces a machine
+that matches the zero length string. Zero length machines have one state that is both
+a start state and a final state.
+
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{bmnull}
+\end{center}
+\graphspace
+
+% FIXME: More on the range of values here.
+\item \verb|42| -- Numerical Literal. Produces a two state machine with one
+transition on the given number. The number may be in decimal or hexadecimal
+format and should be in the range allowed by the alphabet type. The minimum and
+maximum values permitted are defined by the host machine that Ragel is compiled
+on. For example, numbers in a \verb|short| alphabet on an i386 machine should
+be in the range \verb|-32768| to \verb|32767|.
+
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{bmnum}
+\end{center}
+\graphspace
+
+\item \verb|/simple_regex/| -- Regular Expression. Regular expressions are
+parsed as a series of expressions that are concatenated together. Each
+concatenated expression
+may be a literal character, the ``any'' character specified by the \verb|.|
+symbol, or a union of characters specified by the \verb|[]| delimiters. If the
+first character of a union is \verb|^| then it matches any character not in the
+list. Within a union, a range of characters can be given by separating the first
+and last characters of the range with the \verb|-| symbol. Each
+concatenated machine may have repetition specified by following it with the
+\verb|*| symbol. The standard escape sequences described in Section
+\ref{lexing} are supported everywhere in regular expressions except as the
+operands of a range within in a list. This notation also supports the \verb|i|
+trailing option. Use it to produce case-insensitive machines, as in \verb|/GET/i|.
+
+Ragel does not support very complex regular expressions because the desired
+results can always be achieved using the more general machine construction
+operators listed in Section \ref{machconst}. The following diagram shows the
+result of compiling \verb|/ab*[c-z].*[123]/|. \verb|DEF| represents the default
+transition, which is taken if no other transition can be taken.
+
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{bmregex}
+\end{center}
+\graphspace
+
+\item \verb|'a' .. 'z'| -- Range. Produces a machine that matches any
+characters in the specified range. Allowable upper and lower bounds of the
+range are concatenation literals of length one and numerical literals. For
+example, \verb|0x10..0x20|, \verb|0..63|, and \verb|'a'..'z'| are valid ranges.
+The bounds should be in the range allowed by the alphabet type.
+
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{bmrange}
+\end{center}
+\graphspace
+
+\item \verb|variable_name| -- Lookup the machine definition assigned to the
+variable name given and use an instance of it. See Section \ref{definition} for
+an important note on what it means to reference a variable name.
+
+\item \verb|builtin_machine| -- There are several built-in machines available
+for use. They are all two state machines for the purpose of matching common
+classes of characters. They are:
+
+\begin{itemize}
+
+\item \verb|any | -- Any character in the alphabet.
+
+\item \verb|ascii | -- Ascii characters. \verb|0..127|
+
+\item \verb|extend| -- Ascii extended characters. This is the range
+\verb|-128..127| for signed alphabets and the range \verb|0..255| for unsigned
+alphabets.
+
+\item \verb|alpha | -- Alphabetic characters. \verb|[A-Za-z]|
+
+\item \verb|digit | -- Digits. \verb|[0-9]|
+
+\item \verb|alnum | -- Alpha numerics. \verb|[0-9A-Za-z]|
+
+\item \verb|lower | -- Lowercase characters. \verb|[a-z]|
+
+\item \verb|upper | -- Uppercase characters. \verb|[A-Z]|
+
+\item \verb|xdigit| -- Hexadecimal digits. \verb|[0-9A-Fa-f]|
+
+\item \verb|cntrl | -- Control characters. \verb|0..31|
+
+\item \verb|graph | -- Graphical characters. \verb|[!-~]|
+
+\item \verb|print | -- Printable characters. \verb|[ -~]|
+
+\item \verb|punct | -- Punctuation. Graphical characters that are not alphanumerics.
+\verb|[!-/:-@[-`{-~]|
+
+\item \verb|space | -- Whitespace. \verb|[\t\v\f\n\r ]|
+
+\item \verb|zlen | -- Zero length string. \verb|""|
+
+\item \verb|empty | -- Empty set. Matches nothing. \verb|^any|
+
+\end{itemize}
+\end{itemize}
+
+\section{Operator Precedence}
+The following table shows operator precedence from lowest to highest. Operators
+in the same precedence group are evaluated from left to right.
+
+\begin{tabular}{|c|c|c|}
+\hline
+1&\verb| , |&Join\\
+\hline
+2&\verb/ | & - --/&Union, Intersection and Subtraction\\
+\hline
+3&\verb| . <: :> :>> |&Concatenation\\
+\hline
+4&\verb| : |&Label\\
+\hline
+5&\verb| -> |&Epsilon Transition\\
+\hline
+6&\verb| > @ $ % |&Transitions Actions and Priorities\\
+\hline
+6&\verb| >/ $/ %/ </ @/ <>/ |&EOF Actions\\
+\hline
+6&\verb| >! $! %! <! @! <>! |&Global Error Actions\\
+\hline
+6&\verb| >^ $^ %^ <^ @^ <>^ |&Local Error Actions\\
+\hline
+6&\verb| >~ $~ %~ <~ @~ <>~ |&To-State Actions\\
+\hline
+6&\verb| >* $* %* <* @* <>* |&From-State Action\\
+\hline
+7&\verb| * ** ? + {n} {,n} {n,} {n,m} |&Repetition\\
+\hline
+8&\verb| ! ^ |&Negation and Character-Level Negation\\
+\hline
+9&\verb| ( <expr> ) |&Grouping\\
+\hline
+\end{tabular}
+
+\section{Regular Language Operators}
+\label{machconst}
+
+When using Ragel it is helpful to have a sense of how it constructs machines.
+The determinization process can produce results that seem unusual to someone
+not familiar with the NFA to DFA conversion algorithm. In this section we
+describe Ragel's state machine operators. Though the operators are defined
+using epsilon transitions, it should be noted that this is for discussion only.
+The epsilon transitions described in this section do not persist, but are
+immediately removed by the determinization process which is executed at every
+operation. Ragel does not make use of any nondeterministic intermediate state
+machines.
+
+To create an epsilon transition between two states \verb|x| and \verb|y| is to
+copy all of the properties of \verb|y| into \verb|x|. This involves drawing in
+all of \verb|y|'s to-state actions, EOF actions, etc., in addition to its
+transitions. If \verb|x| and \verb|y| both have a transition out on the same
+character, then the transitions must be combined. During transition
+combination a new transition is made that goes to a new state that is the
+combination of both target states. The new combination state is created using
+the same epsilon transition method. The new state has an epsilon transition
+drawn to all the states that compose it. Since the creation of new epsilon
+transitions may be triggered every time an epsilon transition is drawn, the
+process of drawing epsilon transitions is repeated until there are no more
+epsilon transitions to be made.
+
+A very common error that is made when using Ragel is to make machines that do
+too much. That is, to create machines that have unintentional
+nondetermistic properties. This usually results from being unaware of the common strings
+between machines that are combined together using the regular language
+operators. This can involve never leaving a machine, causing its actions to be
+propagated through all the following states. Or it can involve an alternation
+where both branches are unintentionally taken simultaneously.
+
+This problem forces one to think hard about the language that needs to be
+matched. To guard against this kind of problem one must ensure that the machine
+specification is divided up using boundaries that do not allow ambiguities from
+one portion of the machine to the next. See Chapter
+\ref{controlling-nondeterminism} for more on this problem and how to solve it.
+
+The Graphviz tool is an immense help when debugging improperly compiled
+machines or otherwise learning how to use Ragel. Graphviz Dot files can be
+generated from Ragel programs using the \verb|-V| option. See Section
+\ref{visualization} for more information.
+
+
+\subsection{Union}
+
+\verb/expr | expr/
+
+The union operation produces a machine that matches any string in machine one
+or machine two. The operation first creates a new start state. Epsilon
+transitions are drawn from the new start state to the start states of both
+input machines. The resulting machine has a final state set equivalent to the
+union of the final state sets of both input machines. In this operation, there
+is the opportunity for nondeterminism among both branches. If there are
+strings, or prefixes of strings that are matched by both machines then the new
+machine will follow both parts of the alternation at once. The union operation is
+shown below.
+
+\graphspace
+\begin{center}
+\includegraphics[scale=1.0]{opor}
+\end{center}
+\graphspace
+
+The following example demonstrates the union of three machines representing
+common tokens.
+
+% GENERATE: exor
+% OPT: -p
+% %%{
+% machine exor;
+\begin{inline_code}
+\begin{verbatim}
+# Hex digits, decimal digits, or identifiers
+main := '0x' xdigit+ | digit+ | alpha alnum*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exor}
+\end{center}
+\graphspace
+
+\subsection{Intersection}
+
+\verb|expr & expr|
+
+Intersection produces a machine that matches any
+string that is in both machine one and machine two. To achieve intersection, a
+union is performed on the two machines. After the result has been made
+deterministic, any final state that is not a combination of final states from
+both machines has its final state status revoked. To complete the operation,
+paths that do not lead to a final state are pruned from the machine. Therefore,
+if there are any such paths in either of the expressions they will be removed
+by the intersection operator. Intersection can be used to require that two
+independent patterns be simultaneously satisfied as in the following example.
+
+% GENERATE: exinter
+% OPT: -p
+% %%{
+% machine exinter;
+\begin{inline_code}
+\begin{verbatim}
+# Match lines four characters wide that contain
+# words separated by whitespace.
+main :=
+ /[^\n][^\n][^\n][^\n]\n/* &
+ (/[a-z][a-z]*/ | [ \n])**;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exinter}
+\end{center}
+\graphspace
+
+\subsection{Difference}
+
+\verb|expr - expr|
+
+The difference operation produces a machine that matches
+strings that are in machine one but are not in machine two. To achieve subtraction,
+a union is performed on the two machines. After the result has been made
+deterministic, any final state that came from machine two or is a combination
+of states involving a final state from machine two has its final state status
+revoked. As with intersection, the operation is completed by pruning any path
+that does not lead to a final state. The following example demonstrates the
+use of subtraction to exclude specific cases from a set.
+
+% GENERATE: exsubtr
+% OPT: -p
+% %%{
+% machine exsubtr;
+\begin{inline_code}
+\begin{verbatim}
+# Subtract keywords from identifiers.
+main := /[a-z][a-z]*/ - ( 'for' | 'int' );
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exsubtr}
+\end{center}
+\graphspace
+
+\subsection{Strong Difference}
+\label{strong_difference}
+
+\verb|expr -- expr|
+
+Strong difference produces a machine that matches any string of the first
+machine that does not have any string of the second machine as a substring. In
+the following example, strong subtraction is used to excluded \verb|CRLF| from
+a sequence. In the corresponding visualization, the label \verb|DEF| is short
+for default. The default transition is taken if no other transition can be
+taken.
+
+% GENERATE: exstrongsubtr
+% OPT: -p
+% %%{
+% machine exstrongsubtr;
+\begin{inline_code}
+\begin{verbatim}
+crlf = '\r\n';
+main := [a-z]+ ':' ( any* -- crlf ) crlf;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exstrongsubtr}
+\end{center}
+\graphspace
+
+This operator is equivalent to the following.
+
+\begin{verbatim}
+expr - ( any* expr any* )
+\end{verbatim}
+\verbspace
+
+\subsection{Concatenation}
+
+\verb|expr . expr|
+
+Concatenation produces a machine that matches all the strings in machine one followed by all
+the strings in machine two. Concatenation draws epsilon transitions from the
+final states of the first machine to the start state of the second machine. The
+final states of the first machine lose their final state status, unless the
+start state of the second machine is final as well.
+Concatenation is the default operator. Two machines next to each other with no
+operator between them results in concatenation.
+
+\graphspace
+\begin{center}
+\includegraphics[scale=1.0]{opconcat}
+\end{center}
+\graphspace
+
+The opportunity for nondeterministic behaviour results from the possibility of
+the final states of the first machine accepting a string that is also accepted
+by the start state of the second machine.
+The most common scenario in which this happens is the
+concatenation of a machine that repeats some pattern with a machine that gives
+a terminating string, but the repetition machine does not exclude the
+terminating string. The example in Section \ref{strong_difference}
+guards against this. Another example is the expression \verb|("'" any* "'")|.
+When executed the thread of control will
+never leave the \verb|any*| machine. This is a problem especially if actions
+are embedded to process the characters of the \verb|any*| component.
+
+In the following example, the first machine is always active due to the
+nondeterministic nature of concatenation. This particular nondeterminism is intended
+however because we wish to permit EOF strings before the end of the input.
+
+% GENERATE: exconcat
+% OPT: -p
+% %%{
+% machine exconcat;
+\begin{inline_code}
+\begin{verbatim}
+# Require an eof marker on the last line.
+main := /[^\n]*\n/* . 'EOF\n';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exconcat}
+\end{center}
+\graphspace
+
+There is a language
+ambiguity involving concatenation and subtraction. Because concatenation is the
+default operator for two
+adjacent machines there is an ambiguity between subtraction of
+a positive numerical literal and concatenation of a negative numerical literal.
+For example, \verb|(x-7)| could be interpreted as \verb|(x . -7)| or
+\verb|(x - 7)|. In the Ragel language, the subtraction operator always takes precedence
+over concatenation of a negative literal. We adhere to the rule that the default
+concatenation operator takes effect only when there are no other operators between
+two machines. Beware of writing machines such as \verb|(any -1)| when what is
+desired is a concatenation of \verb|any| and \verb|-1|. Instead write
+\verb|(any . -1)| or \verb|(any (-1))|. If in doubt of the meaning of your program do not
+rely on the default concatenation operator; always use the \verb|.| symbol.
+
+
+\subsection{Kleene Star}
+
+\verb|expr*|
+
+The machine resulting from the Kleene Star operator will match zero or more
+repetitions of the machine it is applied to.
+It creates a new start state and an additional final
+state. Epsilon transitions are drawn between the new start state and the old start
+state, between the new start state and the new final state, and
+between the final states of the machine and the new start state. After the
+machine is made deterministic the effect is of the final states getting all the
+transitions of the start state.
+
+\graphspace
+\begin{center}
+\includegraphics[scale=1.0]{opstar}
+\end{center}
+\graphspace
+
+The possibility for nondeterministic behaviour arises if the final states have
+transitions on any of the same characters as the start state. This is common
+when applying kleene star to an alternation of tokens. Like the other problems
+arising from nondeterministic behavior, this is discussed in more detail in Chapter
+\ref{controlling-nondeterminism}. This particular problem can also be solved
+by using the longest-match construction discussed in Section
+\ref{generating-scanners} on scanners.
+
+In this
+example, there is no nondeterminism introduced by the exterior kleene star due to
+the newline at the end of the regular expression. Without the newline the
+exterior kleene star would be redundant and there would be ambiguity between
+repeating the inner range of the regular expression and the entire regular
+expression. Though it would not cause a problem in this case, unnecessary
+nondeterminism in the kleene star operator often causes undesired results for
+new Ragel users and must be guarded against.
+
+% GENERATE: exstar
+% OPT: -p
+% %%{
+% machine exstar;
+\begin{inline_code}
+\begin{verbatim}
+# Match any number of lines with only lowercase letters.
+main := /[a-z]*\n/*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exstar}
+\end{center}
+\graphspace
+
+\subsection{One Or More Repetition}
+
+\verb|expr+|
+
+This operator produces the concatenation of the machine with the kleene star of
+itself. The result will match one or more repetitions of the machine. The plus
+operator is equivalent to \verb|(expr . expr*)|.
+
+% GENERATE: explus
+% OPT: -p
+% %%{
+% machine explus;
+\begin{inline_code}
+\begin{verbatim}
+# Match alpha-numeric words.
+main := alnum+;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{explus}
+\end{center}
+\graphspace
+
+\subsection{Optional}
+
+\verb|expr?|
+
+The {\em optional} operator produces a machine that accepts the machine
+given or the zero length string. The optional operator is equivalent to
+\verb/(expr | '' )/. In the following example the optional operator is used to
+possibly extend a token.
+
+% GENERATE: exoption
+% OPT: -p
+% %%{
+% machine exoption;
+\begin{inline_code}
+\begin{verbatim}
+# Match integers or floats.
+main := digit+ ('.' digit+)?;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exoption}
+\end{center}
+\graphspace
+
+\subsection{Repetition}
+
+\noindent\hspace*{24pt}\verb|expr {n}| -- Exactly N copies of expr.\\
+\noindent\hspace*{24pt}\verb|expr {,n}| -- Zero to N copies of expr.\\
+\noindent\hspace*{24pt}\verb|expr {n,}| -- N or more copies of expr.\\
+\noindent\hspace*{24pt}\verb|expr {n,m}| -- N to M copies of expr.
+\vspace{12pt}
+
+\subsection{Negation}
+
+\verb|!expr|
+
+Negation produces a machine that matches any string not matched by the given
+machine. Negation is equivalent to \verb|(any* - expr)|.
+
+% GENERATE: exnegate
+% OPT: -p
+% %%{
+% machine exnegate;
+\begin{inline_code}
+\begin{verbatim}
+# Accept anything but a string beginning with a digit.
+main := ! ( digit any* );
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exnegate}
+\end{center}
+\graphspace
+
+\subsection{Character-Level Negation}
+
+\verb|^expr|
+
+Character-level negation produces a machine that matches any single character
+not matched by the given machine. Character-Level Negation is equivalent to
+\verb|(any - expr)|. It must be applied only to machines that match strings of
+length one.
+
+\section{State Machine Minimization}
+
+State machine minimization is the process of finding the minimal equivalent FSM accepting
+the language. Minimization reduces the number of states in machines
+by merging equivalent states. It does not change the behaviour of the machine
+in any way. It will cause some states to be merged into one because they are
+functionally equivalent. State minimization is on by default. It can be turned
+off with the \verb|-n| option.
+
+The algorithm implemented is similar to Hopcroft's state minimization
+algorithm. Hopcroft's algorithm assumes a finite alphabet that can be listed in
+memory, whereas Ragel supports arbitrary integer alphabets that cannot be
+listed in memory. Though exact analysis is very difficult, Ragel minimization
+runs close to O(n * log(n)) and requires O(n) temporary storage where
+$n$ is the number of states.
+
+\section{Visualization}
+\label{visualization}
+
+%In many cases, practical
+%parsing programs will be too large to completely visualize with Graphviz. The
+%proper approach is to reduce the language to the smallest subset possible that
+%still exhibits the characteristics that one wishes to learn about or to fix.
+%This can be done without modifying the source code using the \verb|-M| and
+%\verb|-S| options. If a machine cannot be easily reduced,
+%embeddings of unique actions can be very useful for tracing a
+%particular component of a larger machine specification, since action names are
+%written out on transition labels.
+
+Ragel is able to emit compiled state machines in Graphviz's Dot file format.
+This is done using the \verb|-V| option.
+Graphviz support allows users to perform
+incremental visualization of their parsers. User actions are displayed on
+transition labels of the graph.
+
+If the final graph is too large to be
+meaningful, or even drawn, the user is able to inspect portions of the parser
+by naming particular regular expression definitions with the \verb|-S| and
+\verb|-M| options to the \verb|ragel| program. Use of Graphviz greatly
+improves the Ragel programming experience. It allows users to learn Ragel by
+experimentation and also to track down bugs caused by unintended
+nondeterminism.
+
+Ragel has another option to help debugging. The \verb|-x| option causes Ragel
+to emit the compiled machine in an XML format.
+
+\chapter{User Actions}
+
+Ragel permits the user to embed actions into the transitions of a regular
+expression's corresponding state machine. These actions are executed when the
+generated code moves over a transition. Like the regular expression operators,
+the action embedding operators are fully compositional. They take a state
+machine and an action as input, embed the action and yield a new state machine
+that can be used in the construction of other machines. Due to the
+compositional nature of embeddings, the user has complete freedom in the
+placement of actions.
+
+A machine's transitions are categorized into four classes. The action embedding
+operators access the transitions defined by these classes. The {\em entering
+transition} operator \verb|>| isolates the start state, then embeds an action
+into all transitions leaving it. The {\em finishing transition} operator
+\verb|@| embeds an action into all transitions going into a final state. The
+{\em all transition} operator \verb|$| embeds an action into all transitions of
+an expression. The {\em leaving transition} operator \verb|%| provides access
+to the yet-unmade transitions moving out of the machine via the final states.
+
+\section{Embedding Actions}
+
+\begin{verbatim}
+action ActionName {
+ /* Code an action here. */
+ count += 1;
+}
+\end{verbatim}
+\verbspace
+
+The action statement defines a block of code that can be embedded into an FSM.
+Action names can be referenced by the action embedding operators in
+expressions. Though actions need not be named in this way (literal blocks
+of code can be embedded directly when building machines), defining reusable
+blocks of code whenever possible is good practice because it potentially increases the
+degree to which the machine can be minimized.
+
+Within an action some Ragel expressions and statements are parsed and
+translated. These allow the user to interact with the machine from action code.
+See Section \ref{vals} for a complete list of statements and values available
+in code blocks.
+
+\subsection{Entering Action}
+
+\verb|expr > action|
+
+The entering action operator embeds an action into all transitions
+that enter into the machine from the start state. If the start state is final,
+then the action is also embedded into the start state as a leaving action. This
+means that if a machine accepts the zero-length string and control passes
+through the start state then the entering action is executed. Note
+that this can happen on both a following character and on the EOF event.
+
+In some machines the start state has transtions coming in from within the
+machine. In these cases the start state is first isolated from the rest of the
+machine ensuring that the entering actions are exected once only.
+
+% GENERATE: exstact
+% OPT: -p
+% %%{
+% machine exstact;
+\begin{inline_code}
+\begin{verbatim}
+# Execute A at the beginning of a string of alpha.
+action A {}
+main := ( lower* >A ) . ' ';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exstact}
+\end{center}
+\graphspace
+
+\subsection{Finishing Action}
+
+\verb|expr @ action|
+
+The finishing action operator embeds an action into any transitions that move
+the machine into a final state. Further input may move the machine out of the
+final state, but keep it in the machine. Therefore finishing actions may be
+executed more than once if a machine has any internal transitions out of a
+final state. In the following example the final state has no transitions out
+and the finishing action is executed only once.
+
+% GENERATE: exdoneact
+% OPT: -p
+% %%{
+% machine exdoneact;
+% action A {}
+\begin{inline_code}
+\begin{verbatim}
+# Execute A when the trailing space is seen.
+main := ( lower* ' ' ) @A;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exdoneact}
+\end{center}
+\graphspace
+
+\subsection{All Transition Action}
+
+\verb|expr $ action|
+
+The all transition operator embeds an action into all transitions of a machine.
+The action is executed whenever a transition of the machine is taken. In the
+following example, A is executed on every character matched.
+
+% GENERATE: exallact
+% OPT: -p
+% %%{
+% machine exallact;
+% action A {}
+\begin{inline_code}
+\begin{verbatim}
+# Execute A on any characters of the machine.
+main := ( 'm1' | 'm2' ) $A;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exallact}
+\end{center}
+\graphspace
+
+\subsection{Leaving Actions}
+\label{out-actions}
+
+\verb|expr % action|
+
+The leaving action operator queues an action for embedding into the transitions
+that go out of a machine via a final state. The action is first stored in
+the machine's final states and is later transferred to any transitions that are
+made going out of the machine by a kleene star or concatenation operation.
+
+If a final state of the machine is still final when compilation is complete
+then the leaving action is also embedded as an EOF action. Therefore, leaving
+the machine is defined as either leaving on a character or as state machine
+acceptance.
+
+This operator allows one to associate an action with the termination of a
+sequence, without being concerned about what particular character terminates
+the sequence. In the following example, A is executed when leaving the alpha
+machine on the newline character.
+
+% GENERATE: exoutact1
+% OPT: -p
+% %%{
+% machine exoutact1;
+% action A {}
+\begin{inline_code}
+\begin{verbatim}
+# Match a word followed by a newline. Execute A when
+# finishing the word.
+main := ( lower+ %A ) . '\n';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exoutact1}
+\end{center}
+\graphspace
+
+In the following example, the \verb|term_word| action could be used to register
+the appearance of a word and to clear the buffer that the \verb|lower| action used
+to store the text of it.
+
+% GENERATE: exoutact2
+% OPT: -p
+% %%{
+% machine exoutact2;
+% action lower {}
+% action space {}
+% action term_word {}
+% action newline {}
+\begin{inline_code}
+\begin{verbatim}
+word = ( [a-z] @lower )+ %term_word;
+main := word ( ' ' @space word )* '\n' @newline;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exoutact2}
+\end{center}
+\graphspace
+
+In this final example of the action embedding operators, A is executed upon entering
+the alpha machine, B is executed on all transitions of the
+alpha machine, C is executed when the alpha machine is exited by moving into the
+newline machine and N is executed when the newline machine moves into a final
+state.
+
+% GENERATE: exaction
+% OPT: -p
+% %%{
+% machine exaction;
+% action A {}
+% action B {}
+% action C {}
+% action N {}
+\begin{inline_code}
+\begin{verbatim}
+# Execute A on starting the alpha machine, B on every transition
+# moving through it and C upon finishing. Execute N on the newline.
+main := ( lower* >A $B %C ) . '\n' @N;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{exaction}
+\end{center}
+\graphspace
+
+
+\section{State Action Embedding Operators}
+
+The state embedding operators allow one to embed actions into states. Like the
+transition embedding operators, there are several different classes of states
+that the operators access. The meanings of the symbols are similar to the
+meanings of the symbols used for the transition embedding operators. The design
+of the state selections was driven by a need to cover the states of an
+expression with exactly one error action.
+
+Unlike the transition embedding operators, the state embedding operators are
+also distinguished by the different kinds of events that embedded actions can
+be associated with. Therefore the state embedding operators have two
+components. The first, which is the first one or two characters, specifies the
+class of states that the action will be embedded into. The second component
+specifies the type of event the action will be executed on. The symbols of the
+second component also have equivalent kewords.
+
+\begin{multicols}{2}
+The different classes of states are:
+
+\noindent\hspace*{24pt}\verb|> | -- the start state\\
+\noindent\hspace*{24pt}\verb|< | -- any state except the start state\\
+\noindent\hspace*{24pt}\verb|$ | -- all states\\
+\noindent\hspace*{24pt}\verb|% | -- final states\\
+\noindent\hspace*{24pt}\verb|@ | -- any state except final states\\
+\noindent\hspace*{24pt}\verb|<>| -- any except start and final (middle)
+\vspace{12pt}
+
+\columnbreak
+
+The different kinds of embeddings are:
+
+\noindent\hspace*{24pt}\verb|~| -- to-state actions (\verb|to|)\\
+\noindent\hspace*{24pt}\verb|*| -- from-state actions (\verb|from|)\\
+\noindent\hspace*{24pt}\verb|/| -- EOF actions (\verb|eof|)\\
+\noindent\hspace*{24pt}\verb|!| -- error actions (\verb|err|)\\
+\noindent\hspace*{24pt}\verb|^| -- local error actions (\verb|lerr|)
+\vspace{12pt}
+
+\end{multicols}
+
+\subsection{To-State and From-State Actions}
+
+\subsubsection{To-State Actions}
+
+\noindent\hspace*{24pt}\verb|>~action >to(name) >to{...} | -- the start state\\
+\noindent\hspace*{24pt}\verb|<~action <to(name) <to{...} | -- any state except the start state\\
+\noindent\hspace*{24pt}\verb|$~action $to(name) $to{...} | -- all states\\
+\noindent\hspace*{24pt}\verb|%~action %to(name) %to{...} | -- final states\\
+\noindent\hspace*{24pt}\verb|@~action @to(name) @to{...} | -- any state except final states\\
+\noindent\hspace*{24pt}\verb|<>~action <>to(name) <>to{...}| -- any except start and final (middle)
+\vspace{12pt}
+
+
+To-state actions are executed whenever the state machine moves into the
+specified state, either by a natural movement over a transition or by an
+action-based transfer of control such as \verb|fgoto|. They are executed after the
+in-transition's actions but before the current character is advanced and
+tested against the end of the input block. To-state embeddings stay with the
+state. They are irrespective of the state's current set of transitions and any
+future transitions that may be added in or out of the state.
+
+Note that the setting of the current state variable \verb|cs| outside of the
+execute code is not considered by Ragel as moving into a state and consequently
+the to-state actions of the new current state are not executed. This includes
+the initialization of the current state when the machine begins. This is
+because the entry point into the machine execution code is after the execution
+of to-state actions.
+
+\subsubsection{From-State Actions}
+
+\noindent\hspace*{24pt}\verb|>*action >from(name) >from{...} | -- the start state\\
+\noindent\hspace*{24pt}\verb|<*action <from(name) <from{...} | -- any state except the start state\\
+\noindent\hspace*{24pt}\verb|$*action $from(name) $from{...} | -- all states\\
+\noindent\hspace*{24pt}\verb|%*action %from(name) %from{...} | -- final states\\
+\noindent\hspace*{24pt}\verb|@*action @from(name) @from{...} | -- any state except final states\\
+\noindent\hspace*{24pt}\verb|<>*action <>from(name) <>from{...}| -- any except start and final (middle)
+\vspace{12pt}
+
+From-state actions are executed whenever the state machine takes a transition from a
+state, either to itself or to some other state. These actions are executed
+immediately after the current character is tested against the input block end
+marker and before the transition to take is sought based on the current
+character. From-state actions are therefore executed even if a transition
+cannot be found and the machine moves into the error state. Like to-state
+embeddings, from-state embeddings stay with the state.
+
+\subsection{EOF Actions}
+
+\noindent\hspace*{24pt}\verb|>/action >eof(name) >eof{...} | -- the start state\\
+\noindent\hspace*{24pt}\verb|</action <eof(name) <eof{...} | -- any state except the start state\\
+\noindent\hspace*{24pt}\verb|$/action $eof(name) $eof{...} | -- all states\\
+\noindent\hspace*{24pt}\verb|%/action %eof(name) %eof{...} | -- final states\\
+\noindent\hspace*{24pt}\verb|@/action @eof(name) @eof{...} | -- any state except final states\\
+\noindent\hspace*{24pt}\verb|<>/action <>eof(name) <>eof{...}| -- any except start and final (middle)
+\vspace{12pt}
+
+The EOF action embedding operators enable the user to embed actions that are
+executed at the end of the input stream. EOF actions are stored in states and
+generated in the \verb|write exec| block. They are run when \verb|p == pe == eof|
+as the execute block is finishing. EOF actions are free to adjust \verb|p| and
+jump to another part of the machine to restart execution.
+
+\subsection{Handling Errors}
+
+In many applications it is useful to be able to react to parsing errors. The
+user may wish to print an error message that depends on the context. It
+may also be desirable to consume input in an attempt to return the input stream
+to some known state and resume parsing. To support error handling and recovery,
+Ragel provides error action embedding operators. There are two kinds of error
+actions: global error actions and local error actions.
+Error actions can be used to simply report errors, or by jumping to a machine
+instantiation that consumes input, can attempt to recover from errors.
+
+\subsubsection{Global Error Actions}
+
+\noindent\hspace*{24pt}\verb|>!action >err(name) >err{...} | -- the start state\\
+\noindent\hspace*{24pt}\verb|<!action <err(name) <err{...} | -- any state except the start state\\
+\noindent\hspace*{24pt}\verb|$!action $err(name) $err{...} | -- all states\\
+\noindent\hspace*{24pt}\verb|%!action %err(name) %err{...} | -- final states\\
+\noindent\hspace*{24pt}\verb|@!action @err(name) @err{...} | -- any state except final states\\
+\noindent\hspace*{24pt}\verb|<>!action <>err(name) <>err{...}| -- any except start and final (middle)
+\vspace{12pt}
+
+Global error actions are stored in the states they are embedded into until
+compilation is complete. They are then transferred to the transitions that move
+into the error state. These transitions are taken on all input characters that
+are not already covered by the state's transitions. If a state with an error
+action is not final when compilation is complete, then the action is also
+embedded as an EOF action.
+
+Error actions can be used to recover from errors by jumping back into the
+machine with \verb|fgoto| and optionally altering \verb|p|.
+
+\subsubsection{Local Error Actions}
+
+\noindent\hspace*{24pt}\verb|>^action >lerr(name) >lerr{...} | -- the start state\\
+\noindent\hspace*{24pt}\verb|<^action <lerr(name) <lerr{...} | -- any state except the start state\\
+\noindent\hspace*{24pt}\verb|$^action $lerr(name) $lerr{...} | -- all states\\
+\noindent\hspace*{24pt}\verb|%^action %lerr(name) %lerr{...} | -- final states\\
+\noindent\hspace*{24pt}\verb|@^action @lerr(name) @lerr{...} | -- any state except final states\\
+\noindent\hspace*{24pt}\verb|<>^action <>lerr(name) <>lerr{...}| -- any except start and final (middle)
+\vspace{12pt}
+
+Like global error actions, local error actions are also stored in the states
+they are embedded into until a transfer point. The transfer point is different
+however. Each local error action embedding is associated with a name. When a
+machine definition has been fully constructed, all local error action
+embeddings associated with the same name as the machine definition are
+transferred to the error transitions. At this time they are also embedded as
+EOF actions in the case of non-final states.
+
+Local error actions can be used to specify an action to take when a particular
+section of a larger state machine fails to match. A particular machine
+definition's ``thread'' may die and the local error actions executed, however
+the machine as a whole may continue to match input.
+
+There are two forms of local error action embeddings. In the first form the
+name defaults to the current machine. In the second form the machine name can
+be specified. This is useful when it is more convenient to specify the local
+error action in a sub-definition that is used to construct the machine
+definition that the local error action is associated with. To embed local
+error actions and
+explicitly state the machine definition on which the transfer is to happen use
+\verb|(name, action)| as the action.
+
+\subsubsection{Example}
+
+The following example uses error actions to report an error and jump to a
+machine that consumes the remainder of the line when parsing fails. After
+consuming the line, the error recovery machine returns to the main loop.
+
+% GENERATE: erract
+% %%{
+% machine erract;
+% ws = ' ';
+% address = 'foo AT bar..com';
+% date = 'Monday May 12';
+\begin{inline_code}
+\begin{verbatim}
+action cmd_err {
+ printf( "command error\n" );
+ fhold; fgoto line;
+}
+action from_err {
+ printf( "from error\n" );
+ fhold; fgoto line;
+}
+action to_err {
+ printf( "to error\n" );
+ fhold; fgoto line;
+}
+
+line := [^\n]* '\n' @{ fgoto main; };
+
+main := (
+ (
+ 'from' @err(cmd_err)
+ ( ws+ address ws+ date '\n' ) $err(from_err) |
+ 'to' @err(cmd_err)
+ ( ws+ address '\n' ) $err(to_err)
+ )
+)*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% %% write data;
+% void f()
+% {
+% %% write init;
+% %% write exec;
+% }
+% END GENERATE
+
+
+
+\section{Action Ordering and Duplicates}
+
+When combining expressions that have embedded actions it is often the case that
+a number of actions must be executed on a single input character. For example,
+following a concatenation the leaving action of the left expression and the
+entering action of the right expression will be embedded into one transition.
+This requires a method of ordering actions that is intuitive and
+predictable for the user, and repeatable for the compiler.
+
+We associate with the embedding of each action a unique timestamp that is
+used to order actions that appear together on a single transition in the final
+state machine. To accomplish this we recursively traverse the parse tree of
+regular expressions and assign timestamps to action embeddings. References to
+machine definitions are followed in the traversal. When we visit a
+parse tree node we assign timestamps to all {\em entering} action embeddings,
+recurse on the parse tree, then assign timestamps to the remaining {\em all},
+{\em finishing}, and {\em leaving} embeddings in the order in which they
+appear.
+
+By default Ragel does not permit a single action to appear multiple times in an action
+list. When the final machine has been created, actions that appear more than
+once in a single transition, to-state, from-state or EOF action list have their
+duplicates removed.
+The first appearance of the action is preserved. This is useful in a number of
+scenarios. First, it allows us to union machines with common prefixes without
+worrying about the action embeddings in the prefix being duplicated. Second, it
+prevents leaving actions from being transferred multiple times. This can
+happen when a machine is repeated, then followed with another machine that
+begins with a common character. For example:
+
+\begin{verbatim}
+word = [a-z]+ %act;
+main := word ( '\n' word )* '\n\n';
+\end{verbatim}
+\verbspace
+
+Note that Ragel does not compare action bodies to determine if they have
+identical program text. It simply checks for duplicates using each action
+block's unique location in the program.
+
+The removal of duplicates can be turned off using the \verb|-d| option.
+
+\section{Values and Statements Available in Code Blocks}
+\label{vals}
+
+The following values are available in code blocks:
+
+\begin{itemize}
+\item \verb|fpc| -- A pointer to the current character. This is equivalent to
+accessing the \verb|p| variable.
+
+\item \verb|fc| -- The current character. This is equivalent to the expression \verb|(*p)|.
+
+\item \verb|fcurs| -- An integer value representing the current state. This
+value should only be read from. To move to a different place in the machine
+from action code use the \verb|fgoto|, \verb|fnext| or \verb|fcall| statements.
+Outside of the machine execution code the \verb|cs| variable may be modified.
+
+\item \verb|ftargs| -- An integer value representing the target state. This
+value should only be read from. Again, \verb|fgoto|, \verb|fnext| and
+\verb|fcall| can be used to move to a specific entry point.
+
+\item \verb|fentry(<label>)| -- Retrieve an integer value representing the
+entry point \verb|label|. The integer value returned will be a compile time
+constant. This number is suitable for later use in control flow transfer
+statements that take an expression. This value should not be compared against
+the current state because any given label can have multiple states representing
+it. The value returned by \verb|fentry| can be any one of the multiple states that
+it represents.
+\end{itemize}
+
+The following statements are available in code blocks:
+
+\begin{itemize}
+
+\item \verb|fhold;| -- Do not advance over the current character. If processing
+data in multiple buffer blocks, the \verb|fhold| statement should only be used
+once in the set of actions executed on a character. Multiple calls may result
+in backing up over the beginning of the buffer block. The \verb|fhold|
+statement does not imply any transfer of control. It is equivalent to the
+\verb|p--;| statement.
+
+\item \verb|fexec <expr>;| -- Set the next character to process. This can be
+used to backtrack to previous input or advance ahead.
+Unlike \verb|fhold|, which can be used
+anywhere, \verb|fexec| requires the user to ensure that the target of the
+backtrack is in the current buffer block or is known to be somewhere ahead of
+it. The machine will continue iterating forward until \verb|pe| is arrived at,
+\verb|fbreak| is called or the machine moves into the error state. In actions
+embedded into transitions, the \verb|fexec| statement is equivalent to setting
+\verb|p| to one position ahead of the next character to process. If the user
+also modifies \verb|pe|, it is possible to change the buffer block entirely.
+
+\item \verb|fgoto <label>;| -- Jump to an entry point defined by
+\verb|<label>|. The \verb|fgoto| statement immediately transfers control to
+the destination state.
+
+\item \verb|fgoto *<expr>;| -- Jump to an entry point given by \verb|<expr>|.
+The expression must evaluate to an integer value representing a state.
+
+\item \verb|fnext <label>;| -- Set the next state to be the entry point defined
+by \verb|label|. The \verb|fnext| statement does not immediately jump to the
+specified state. Any action code following the statement is executed.
+
+\item \verb|fnext *<expr>;| -- Set the next state to be the entry point given
+by \verb|<expr>|. The expression must evaluate to an integer value representing
+a state.
+
+\item \verb|fcall <label>;| -- Push the target state and jump to the entry
+point defined by \verb|<label>|. The next \verb|fret| will jump to the target
+of the transition on which the call was made. Use of \verb|fcall| requires
+the declaration of a call stack. An array of integers named \verb|stack| and a
+single integer named \verb|top| must be declared. With the \verb|fcall|
+construct, control is immediately transferred to the destination state.
+See section \ref{modularization} for more information.
+
+\item \verb|fcall *<expr>;| -- Push the current state and jump to the entry
+point given by \verb|<expr>|. The expression must evaluate to an integer value
+representing a state.
+
+\item \verb|fret;| -- Return to the target state of the transition on which the
+last \verb|fcall| was made. Use of \verb|fret| requires the declaration of a
+call stack. Control is immediately transferred to the destination state.
+
+\item \verb|fbreak;| -- Advance \verb|p|, save the target state to \verb|cs|
+and immediately break out of the execute loop. This statement is useful
+in conjunction with the \verb|noend| write option. Rather than process input
+until \verb|pe| is arrived at, the fbreak statement
+can be used to stop processing from an action. After an \verb|fbreak|
+statement the \verb|p| variable will point to the next character in the input. The
+current state will be the target of the current transition. Note that \verb|fbreak|
+causes the target state's to-state actions to be skipped.
+
+\end{itemize}
+
+Once actions with control-flow commands are embedded into a
+machine, the user must exercise caution when using the machine as the operand
+to other machine construction operators. If an action jumps to another state
+then unioning any transition that executes that action with another transition
+that follows some other path will cause that other path to be lost. Using
+commands that manually jump around a machine takes us out of the domain of
+regular languages because transitions that the
+machine construction operators are not aware of are introduced. These
+commands should therefore be used with caution.
+
+
+\chapter{Controlling Nondeterminism}
+\label{controlling-nondeterminism}
+
+Along with the flexibility of arbitrary action embeddings comes a need to
+control nondeterminism in regular expressions. If a regular expression is
+ambiguous, then sub-components of a parser other than the intended parts may become
+active. This means that actions that are irrelevant to the
+current subset of the parser may be executed, causing problems for the
+programmer.
+
+Tools that are based on regular expression engines and that are used for
+recognition tasks will usually function as intended regardless of the presence
+of ambiguities. It is quite common for users of scripting languages to write
+regular expressions that are heavily ambiguous and it generally does not
+matter. As long as one of the potential matches is recognized, there can be any
+number of other matches present. In some parsing systems the run-time engine
+can employ a strategy for resolving ambiguities, for example always pursuing
+the longest possible match and discarding others.
+
+In Ragel, there is no regular expression run-time engine, just a simple state
+machine execution model. When we begin to embed actions and face the
+possibility of spurious action execution, it becomes clear that controlling
+nondeterminism at the machine construction level is very important. Consider
+the following example.
+
+% GENERATE: lines1
+% OPT: -p
+% %%{
+% machine lines1;
+% action first {}
+% action tail {}
+% word = [a-z]+;
+\begin{inline_code}
+\begin{verbatim}
+ws = [\n\t ];
+line = word $first ( ws word $tail )* '\n';
+lines = line*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% main := lines;
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.53]{lines1}
+\end{center}
+\graphspace
+
+Since the \verb|ws| expression includes the newline character, we will
+not finish the \verb|line| expression when a newline character is seen. We will
+simultaneously pursue the possibility of matching further words on the same
+line and the possibility of matching a second line. Evidence of this fact is
+in the state tables. On several transitions both the \verb|first| and
+\verb|tail| actions are executed. The solution here is simple: exclude
+the newline character from the \verb|ws| expression.
+
+% GENERATE: lines2
+% OPT: -p
+% %%{
+% machine lines2;
+% action first {}
+% action tail {}
+% word = [a-z]+;
+\begin{inline_code}
+\begin{verbatim}
+ws = [\t ];
+line = word $first ( ws word $tail )* '\n';
+lines = line*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% main := lines;
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{lines2}
+\end{center}
+\graphspace
+
+Solving this kind of problem is straightforward when the ambiguity is created
+by strings that are a single character long. When the ambiguity is created by
+strings that are multiple characters long we have a more difficult problem.
+The following example is an incorrect attempt at a regular expression for C
+language comments.
+
+% GENERATE: comments1
+% OPT: -p
+% %%{
+% machine comments1;
+% action comm {}
+\begin{inline_code}
+\begin{verbatim}
+comment = '/*' ( any @comm )* '*/';
+main := comment ' ';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{comments1}
+\end{center}
+\graphspace
+
+Using standard concatenation, we will never leave the \verb|any*| expression.
+We will forever entertain the possibility that a \verb|'*/'| string that we see
+is contained in a longer comment and that, simultaneously, the comment has
+ended. The concatenation of the \verb|comment| machine with \verb|SP| is done
+to show this. When we match space, we are also still matching the comment body.
+
+One way to approach the problem is to exclude the terminating string
+from the \verb|any*| expression using set difference. We must be careful to
+exclude not just the terminating string, but any string that contains it as a
+substring. A verbose, but proper specification of a C comment parser is given
+by the following regular expression.
+
+% GENERATE: comments2
+% OPT: -p
+% %%{
+% machine comments2;
+% action comm {}
+\begin{inline_code}
+\begin{verbatim}
+comment = '/*' ( ( any @comm )* - ( any* '*/' any* ) ) '*/';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% main := comment;
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{comments2}
+\end{center}
+\graphspace
+
+Note that Ragel's strong subtraction operator \verb|--| can also be used here.
+In doing this subtraction we have phrased the problem of controlling non-determinism in
+terms of excluding strings common to two expressions that interact when
+combined.
+We can also phrase the problem in terms of the transitions of the state
+machines that implement these expressions. During the concatenation of
+\verb|any*| and \verb|'*/'| we will be making transitions that are composed of
+both the loop of the first expression and the final character of the second.
+At this time we want the transition on the \verb|'/'| character to take precedence
+over and disallow the transition that originated in the \verb|any*| loop.
+
+In another parsing problem, we wish to implement a lightweight tokenizer that we can
+utilize in the composition of a larger machine. For example, some HTTP headers
+have a token stream as a sub-language. The following example is an attempt
+at a regular expression-based tokenizer that does not function correctly due to
+unintended nondeterminism.
+
+% GENERATE: smallscanner
+% OPT: -p
+% %%{
+% machine smallscanner;
+% action start_str {}
+% action on_char {}
+% action finish_str {}
+\begin{inline_code}
+\begin{verbatim}
+header_contents = (
+ lower+ >start_str $on_char %finish_str |
+ ' '
+)*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% main := header_contents;
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{smallscanner}
+\end{center}
+\graphspace
+
+In this case, the problem with using a standard kleene star operation is that
+there is an ambiguity between extending a token and wrapping around the machine
+to begin a new token. Using the standard operator, we get an undesirable
+nondeterministic behaviour. Evidence of this can be seen on the transition out
+of state one to itself. The transition extends the string, and simultaneously,
+finishes the string only to immediately begin a new one. What is required is
+for the
+transitions that represent an extension of a token to take precedence over the
+transitions that represent the beginning of a new token. For this problem
+there is no simple solution that uses standard regular expression operators.
+
+\section{Priorities}
+
+A priority mechanism was devised and built into the determinization
+process, specifically for the purpose of allowing the user to control
+nondeterminism. Priorities are integer values embedded into transitions. When
+the determinization process is combining transitions that have different
+priorities, the transition with the higher priority is preserved and the
+transition with the lower priority is dropped.
+
+Unfortunately, priorities can have unintended side effects because their
+operation requires that they linger in transitions indefinitely. They must linger
+because the Ragel program cannot know when the user is finished with a priority
+embedding. A solution whereby they are explicitly deleted after use is
+conceivable; however this is not very user-friendly. Priorities were therefore
+made into named entities. Only priorities with the same name are allowed to
+interact. This allows any number of priorities to coexist in one machine for
+the purpose of controlling various different regular expression operations and
+eliminates the need to ever delete them. Such a scheme allows the user to
+choose a unique name, embed two different priority values using that name
+and be confident that the priority embedding will be free of any side effects.
+
+In the first form of priority embedding the name defaults to the name of the machine
+definition that the priority is assigned in. In this sense priorities are by
+default local to the current machine definition or instantiation. Beware of
+using this form in a longest-match machine, since there is only one name for
+the entire set of longest match patterns. In the second form the priority's
+name can be specified, allowing priority interaction across machine definition
+boundaries.
+
+\begin{itemize}
+\item \verb|expr > int| -- Sets starting transitions to have priority int.
+\item \verb|expr @ int| -- Sets transitions that go into a final state to have priority int.
+\item \verb|expr $ int| -- Sets all transitions to have priority int.
+\item \verb|expr % int| -- Sets leaving transitions to
+have priority int. When a transition is made going out of the machine (either
+by concatenation or kleene star) its priority is immediately set to the
+leaving priority.
+\end{itemize}
+
+The second form of priority assignment allows the programmer to specify the name
+to which the priority is assigned.
+
+\begin{itemize}
+\item \verb|expr > (name, int)| -- Starting transitions.
+\item \verb|expr @ (name, int)| -- Finishing transitions (into a final state).
+\item \verb|expr $ (name, int)| -- All transitions.
+\item \verb|expr % (name, int)| -- Leaving transitions.
+\end{itemize}
+
+\section{Guarded Operators that Encapsulate Priorities}
+
+Priority embeddings are a very expressive mechanism. At the same time they
+can be very confusing for the user. They force the user to imagine
+the transitions inside two interacting expressions and work out the precise
+effects of the operations between them. When we consider
+that this problem is worsened by the
+potential for side effects caused by unintended priority name collisions, we
+see that exposing the user to priorities is undesirable.
+
+Fortunately, in practice the use of priorities has been necessary only in a
+small number of scenarios. This allows us to encapsulate their functionality
+into a small set of operators and fully hide them from the user. This is
+advantageous from a language design point of view because it greatly simplifies
+the design.
+
+Going back to the C comment example, we can now properly specify
+it using a guarded concatenation operator which we call {\em finish-guarded
+concatenation}. From the user's point of view, this operator terminates the
+first machine when the second machine moves into a final state. It chooses a
+unique name and uses it to embed a low priority into all
+transitions of the first machine. A higher priority is then embedded into the
+transitions of the second machine that enter into a final state. The following
+example yields a machine identical to the example in Section
+\ref{controlling-nondeterminism}.
+
+\begin{inline_code}
+\begin{verbatim}
+comment = '/*' ( any @comm )* :>> '*/';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{comments2}
+\end{center}
+\graphspace
+
+Another guarded operator is {\em left-guarded concatenation}, given by the
+\verb|<:| compound symbol. This operator places a higher priority on all
+transitions of the first machine. This is useful if one must forcibly separate
+two lists that contain common elements. For example, one may need to tokenize a
+stream, but first consume leading whitespace.
+
+Ragel also includes a {\em longest-match kleene star} operator, given by the
+\verb|**| compound symbol. This
+guarded operator embeds a high
+priority into all transitions of the machine.
+A lower priority is then embedded into the leaving transitions. When the
+kleene star operator makes the epsilon transitions from
+the final states into the new start state, the lower priority will be transferred
+to the epsilon transitions. In cases where following an epsilon transition
+out of a final state conflicts with an existing transition out of a final
+state, the epsilon transition will be dropped.
+
+Other guarded operators are conceivable, such as guards on union that cause one
+alternative to take precedence over another. These may be implemented when it
+is clear they constitute a frequently used operation.
+In the next section we discuss the explicit specification of state machines
+using state charts.
+
+\subsection{Entry-Guarded Concatenation}
+
+\verb|expr :> expr|
+
+This operator concatenates two machines, but first assigns a low
+priority to all transitions
+of the first machine and a high priority to the starting transitions of the
+second machine. This operator is useful if from the final states of the first
+machine it is possible to accept the characters in the entering transitions of
+the second machine. This operator effectively terminates the first machine
+immediately upon starting the second machine, where otherwise they would be
+pursued concurrently. In the following example, entry-guarded concatenation is
+used to move out of a machine that matches everything at the first sign of an
+end-of-input marker.
+
+% GENERATE: entryguard
+% OPT: -p
+% %%{
+% machine entryguard;
+\begin{inline_code}
+\begin{verbatim}
+# Leave the catch-all machine on the first character of FIN.
+main := any* :> 'FIN';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{entryguard}
+\end{center}
+\graphspace
+
+Entry-guarded concatenation is equivalent to the following:
+
+\begin{verbatim}
+expr $(unique_name,0) . expr >(unique_name,1)
+\end{verbatim}
+\verbspace
+
+\subsection{Finish-Guarded Concatenation}
+
+\verb|expr :>> expr|
+
+This operator is
+like the previous operator, except the higher priority is placed on the final
+transitions of the second machine. This is useful if one wishes to entertain
+the possibility of continuing to match the first machine right up until the
+second machine enters a final state. In other words it terminates the first
+machine only when the second accepts. In the following example, finish-guarded
+concatenation causes the move out of the machine that matches everything to be
+delayed until the full end-of-input marker has been matched.
+
+% GENERATE: finguard
+% OPT: -p
+% %%{
+% machine finguard;
+\begin{inline_code}
+\begin{verbatim}
+# Leave the catch-all machine on the last character of FIN.
+main := any* :>> 'FIN';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{finguard}
+\end{center}
+\graphspace
+
+Finish-guarded concatenation is equivalent to the following, with one
+exception. If the right machine's start state is final, the higher priority is
+also embedded into it as a leaving priority. This prevents the left machine
+from persisting via the zero-length string.
+
+\begin{verbatim}
+expr $(unique_name,0) . expr @(unique_name,1)
+\end{verbatim}
+\verbspace
+
+\subsection{Left-Guarded Concatenation}
+
+\verb|expr <: expr|
+
+This operator places
+a higher priority on the left expression. It is useful if you want to prefix a
+sequence with another sequence composed of some of the same characters. For
+example, one can consume leading whitespace before tokenizing a sequence of
+whitespace-separated words as in:
+
+% GENERATE: leftguard
+% OPT: -p
+% %%{
+% machine leftguard;
+% action alpha {}
+% action ws {}
+% action start {}
+% action fin {}
+\begin{inline_code}
+\begin{verbatim}
+main := ( ' '* >start %fin ) <: ( ' ' $ws | [a-z] $alpha )*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{leftguard}
+\end{center}
+\graphspace
+
+Left-guarded concatenation is equivalent to the following:
+
+\begin{verbatim}
+expr $(unique_name,1) . expr >(unique_name,0)
+\end{verbatim}
+\verbspace
+
+\subsection{Longest-Match Kleene Star}
+\label{longest_match_kleene_star}
+
+\verb|expr**|
+
+This version of kleene star puts a higher priority on staying in the
+machine versus wrapping around and starting over. The LM kleene star is useful
+when writing simple tokenizers. These machines are built by applying the
+longest-match kleene star to an alternation of token patterns, as in the
+following.
+
+% GENERATE: lmkleene
+% OPT: -p
+% %%{
+% machine exfinpri;
+% action A {}
+% action B {}
+\begin{inline_code}
+\begin{verbatim}
+# Repeat tokens, but make sure to get the longest match.
+main := (
+ lower ( lower | digit )* %A |
+ digit+ %B |
+ ' '
+)**;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{lmkleene}
+\end{center}
+\graphspace
+
+If a regular kleene star were used the machine above would not be able to
+distinguish between extending a word and beginning a new one. This operator is
+equivalent to:
+
+\begin{verbatim}
+( expr $(unique_name,1) %(unique_name,0) )*
+\end{verbatim}
+\verbspace
+
+When the kleene star is applied, transitions that go out of the machine and
+back into it are made. These are assigned a priority of zero by the leaving
+transition mechanism. This is less than the priority of one assigned to the
+transitions leaving the final states but not leaving the machine. When
+these transitions clash on the same character, the
+transition that stays in the machine takes precedence. The transition
+that wraps around is dropped.
+
+Note that this operator does not build a scanner in the traditional sense
+because there is never any backtracking. To build a scanner with backtracking
+use the Longest-Match machine construction described in Section
+\ref{generating-scanners}.
+
+\chapter{Interface to Host Program}
+
+The Ragel code generator is very flexible. The generated code has no
+dependencies and can be inserted in any function, perhaps inside a loop if
+desired. The user is responsible for declaring and initializing a number of
+required variables, including the current state and the pointer to the input
+stream. These can live in any scope. Control of the input processing loop is
+also possible: the user may break out of the processing loop and return to it
+at any time.
+
+In the case of the C, D, and Go host languages, Ragel is able to generate very
+fast-running code that implements state machines as directly executable code.
+Since very large files strain the host language compiler, table-based code
+generation is also supported. In the future we hope to provide a partitioned,
+directly executable format that is able to reduce the burden on the host
+compiler by splitting large machines across multiple functions.
+
+In the case of Java and Ruby, table-based code generation is the only code
+style supported. In the future this may be expanded to include other code
+styles.
+
+Ragel can be used to parse input in one block, or it can be used to parse input
+in a sequence of blocks as it arrives from a file or socket. Parsing the input
+in a sequence of blocks brings with it a few responsibilities. If the parser
+utilizes a scanner, care must be taken to not break the input stream anywhere
+but token boundaries. If pointers to the input stream are taken during
+parsing, care must be taken to not use a pointer that has been invalidated by
+movement to a subsequent block. If the current input data pointer is moved
+backwards it must not be moved past the beginning of the current block.
+
+Figure \ref{basic-example} shows a simple Ragel program that does not have any
+actions. The example tests the first argument of the program against a number
+pattern and then prints the machine's acceptance status.
+
+\begin{figure}
+\small
+\begin{verbatim}
+#include <stdio.h>
+#include <string.h>
+%%{
+ machine foo;
+ write data;
+}%%
+int main( int argc, char **argv )
+{
+ int cs;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ char *pe = p + strlen( p );
+ %%{
+ main := [0-9]+ ( '.' [0-9]+ )?;
+
+ write init;
+ write exec;
+ }%%
+ }
+ printf("result = %i\n", cs >= foo_first_final );
+ return 0;
+}
+\end{verbatim}
+\verbspace
+\caption{A basic Ragel example without any actions.
+}
+\label{basic-example}
+\end{figure}
+
+\section{Variables Used by Ragel}
+
+There are a number of variables that Ragel expects the user to declare. At a
+very minimum the \verb|cs|, \verb|p| and \verb|pe| variables must be declared.
+In Go, Java and Ruby code the \verb|data| variable must also be declared. If
+EOF actions are used then the \verb|eof| variable is required. If
+stack-based state machine control flow statements are used then the
+\verb|stack| and \verb|top| variables are required. If a scanner is declared
+then the \verb|act|, \verb|ts| and \verb|te| variables must be
+declared.
+
+\begin{itemize}
+
+\item \verb|cs| - Current state. This must be an integer and it should persist
+across invocations of the machine when the data is broken into blocks that are
+processed independently. This variable may be modified from outside the
+execution loop, but not from within.
+
+\item \verb|p| - Data pointer. In C/D code this variable is expected to be a
+pointer to the character data to process. It should be initialized to the
+beginning of the data block on every run of the machine. In Go, Java and Ruby it is
+used as an offset to \verb|data| and must be an integer. In this case it should
+be initialized to zero on every run of the machine.
+
+\item \verb|pe| - Data end pointer. This should be initialized to \verb|p| plus
+the data length on every run of the machine. In Go, Java and Ruby code this should
+be initialized to the data length.
+
+\item \verb|eof| - End of file pointer. This should be set to \verb|pe| when
+the buffer block being processed is the last one, otherwise it should be set to
+null. In Go, Java and Ruby code \verb|-1| must be used instead of null. If the EOF
+event can be known only after the final buffer block has been processed, then
+it is possible to set \verb|p = pe = eof| and run the execute block.
+
+\item \verb|data| - This variable is only required in Go, Java and Ruby code. It
+must be an array containting the data to process.
+
+\item \verb|stack| - This must be an array of integers. It is used to store
+integer values representing states. If the stack must resize dynamically the
+Pre-push and Post-Pop statements can be used to do this (Sections
+\ref{prepush} and \ref{postpop}).
+
+\item \verb|top| - This must be an integer value and will be used as an offset
+to \verb|stack|, giving the next available spot on the top of the stack.
+
+\item \verb|act| - This must be an integer value. It is a variable sometimes
+used by scanner code to keep track of the most recent successful pattern match.
+
+\item \verb|ts| - This must be a pointer to character data. In Go, Java and
+Ruby code this must be an integer. See Section \ref{generating-scanners} for
+more information.
+
+\item \verb|te| - Also a pointer to character data.
+
+\end{itemize}
+
+\section{Alphtype Statement}
+
+\begin{verbatim}
+alphtype unsigned int;
+\end{verbatim}
+\verbspace
+
+The alphtype statement specifies the alphabet data type that the machine
+operates on. During the compilation of the machine, integer literals are
+expected to be in the range of possible values of the alphtype. The default
+is \verb|char| for all languages except Go where the default is \verb|byte|.
+
+\begin{multicols}{2}
+C/C++/Objective-C:
+\begin{verbatim}
+ char unsigned char
+ short unsigned short
+ int unsigned int
+ long unsigned long
+\end{verbatim}
+\verbspace
+
+Go:
+\begin{verbatim}
+ byte
+ int8 uint8
+ int16 uint16
+ int32 uint32
+ int
+\end{verbatim}
+\verbspace
+
+Ruby:
+\begin{verbatim}
+ char
+ int
+\end{verbatim}
+\verbspace
+
+\columnbreak
+
+Java:
+\begin{verbatim}
+ char
+ byte
+ short
+ int
+\end{verbatim}
+\verbspace
+
+D:
+\begin{verbatim}
+ char
+ byte ubyte
+ short ushort
+ wchar
+ int uint
+ dchar
+\end{verbatim}
+\verbspace
+
+\end{multicols}
+
+\section{Getkey Statement}
+
+\begin{verbatim}
+getkey fpc->id;
+\end{verbatim}
+\verbspace
+
+This statement specifies to Ragel how to retrieve the current character from
+from the pointer to the current element (\verb|p|). Any expression that returns
+a value of the alphabet type
+may be used. The getkey statement may be used for looking into element
+structures or for translating the character to process. The getkey expression
+defaults to \verb|(*p)|. In goto-driven machines the getkey expression may be
+evaluated more than once per element processed, therefore it should not incur a
+large cost nor preclude optimization.
+
+\section{Access Statement}
+
+\begin{verbatim}
+access fsm->;
+\end{verbatim}
+\verbspace
+
+The access statement specifies how the generated code should
+access the machine data that is persistent across processing buffer blocks.
+This applies to all variables except \verb|p|, \verb|pe| and \verb|eof|. This includes
+\verb|cs|, \verb|top|, \verb|stack|, \verb|ts|, \verb|te| and \verb|act|.
+The access statement is useful if a machine is to be encapsulated inside a
+structure in C code. It can be used to give the name of
+a pointer to the structure.
+
+\section{Variable Statement}
+
+\begin{verbatim}
+variable p fsm->p;
+\end{verbatim}
+\verbspace
+
+The variable statement specifies how to access a specific
+variable. All of the variables that are declared by the user and
+used by Ragel can be changed. This includes \verb|p|, \verb|pe|, \verb|eof|, \verb|cs|,
+\verb|top|, \verb|stack|, \verb|ts|, \verb|te| and \verb|act|.
+In Go, Ruby and Java code generation the \verb|data| variable can also be changed.
+
+\section{Pre-Push Statement}
+\label{prepush}
+
+\begin{verbatim}
+prepush {
+ /* stack growing code */
+}
+\end{verbatim}
+\verbspace
+
+The prepush statement allows the user to supply stack management code that is
+written out during the generation of fcall, immediately before the current
+state is pushed to the stack. This statement can be used to test the number of
+available spaces and dynamically grow the stack if necessary.
+
+\section{Post-Pop Statement}
+\label{postpop}
+
+\begin{verbatim}
+postpop {
+ /* stack shrinking code */
+}
+\end{verbatim}
+\verbspace
+
+The postpop statement allows the user to supply stack management code that is
+written out during the generation of fret, immediately after the next state is
+popped from the stack. This statement can be used to dynamically shrink the
+stack.
+
+\section{Write Statement}
+\label{write-statement}
+
+\begin{verbatim}
+write <component> [options];
+\end{verbatim}
+\verbspace
+
+The write statement is used to generate parts of the machine.
+There are seven
+components that can be generated by a write statement. These components make up the
+state machine's data, initialization code, execution code, and export definitions.
+A write statement may appear before a machine is fully defined.
+This allows one to write out the data first then later define the machine where
+it is used. An example of this is shown in Figure \ref{fbreak-example}.
+
+\subsection{Write Data}
+\begin{verbatim}
+write data [options];
+\end{verbatim}
+\verbspace
+
+The write data statement causes Ragel to emit the constant static data needed
+by the machine. In table-driven output styles (see Section \ref{genout}) this
+is a collection of arrays that represent the states and transitions of the
+machine. In goto-driven machines much less data is emitted. At the very
+minimum a start state \verb|name_start| is generated. All variables written
+out in machine data have both the \verb|static| and \verb|const| properties and
+are prefixed with the name of the machine and an
+underscore. The data can be placed inside a class, inside a function, or it can
+be defined as global data.
+
+Two variables are written that may be used to test the state of the machine
+after a buffer block has been processed. The \verb|name_error| variable gives
+the id of the state that the machine moves into when it cannot find a valid
+transition to take. The machine immediately breaks out of the processing loop when
+it finds itself in the error state. The error variable can be compared to the
+current state to determine if the machine has failed to parse the input. If the
+machine is complete, that is from every state there is a transition to a proper
+state on every possible character of the alphabet, then no error state is required
+and this variable will be set to -1.
+
+The \verb|name_first_final| variable stores the id of the first final state.
+All of the machine's states are sorted by their final state status before
+having their ids assigned. Checking if the machine has accepted its input can
+then be done by checking if the current state is greater-than or equal to the
+first final state.
+
+Data generation has several options:
+
+\noindent\hspace*{24pt}\verb|noerror | - Do not generate the integer variable that gives the id of the error state.\\
+\noindent\hspace*{24pt}\verb|nofinal | - Do not generate the integer variable that gives the id of the first final state.\\
+\noindent\hspace*{24pt}\verb|noprefix | - Do not prefix the variable names with the name of the machine.
+\vspace{12pt}
+
+\begin{figure}
+\small
+\begin{verbatim}
+#include <stdio.h>
+%% machine foo;
+%% write data;
+int main( int argc, char **argv )
+{
+ int cs, res = 0;
+ if ( argc > 1 ) {
+ char *p = argv[1];
+ %%{
+ main :=
+ [a-z]+
+ 0 @{ res = 1; fbreak; };
+ write init;
+ write exec noend;
+ }%%
+ }
+ printf("execute = %i\n", res );
+ return 0;
+}
+\end{verbatim}
+\verbspace
+\caption{Use of {\tt noend} write option and the {\tt fbreak} statement for
+processing a string.
+}
+\label{fbreak-example}
+\end{figure}
+
+\subsection{Write Start, First Final and Error}
+
+\begin{verbatim}
+write start;
+write first_final;
+write error;
+\end{verbatim}
+\verbspace
+
+These three write statements provide an alternative means of accessing the
+\verb|start|, \verb|first_final| and \verb|error| states. If there are many
+different machine specifications in one file it is easy to get the prefix for
+these wrong. This is especially true if the state machine boilerplate is
+frequently made by a copy-paste-edit process. These write statements allow the
+problem to be avoided. They can be used as follows:
+
+\begin{verbatim}
+/* Did parsing succeed? */
+if ( cs < %%{ write first_final; }%% ) {
+ result = ERR_PARSE_ERROR;
+ goto fail;
+}
+\end{verbatim}
+\verbspace
+
+\subsection{Write Init}
+\begin{verbatim}
+write init [options];
+\end{verbatim}
+\verbspace
+
+The write init statement causes Ragel to emit initialization code. This should
+be executed once before the machine is started. At a very minimum this sets the
+current state to the start state. If other variables are needed by the
+generated code, such as call stack variables or scanner management
+variables, they are also initialized here.
+
+The \verb|nocs| option to the write init statement will cause ragel to skip
+intialization of the cs variable. This is useful if the user wishes to use
+custom logic to decide which state the specification should start in.
+
+\subsection{Write Exec}
+\begin{verbatim}
+write exec [options];
+\end{verbatim}
+\verbspace
+
+The write exec statement causes Ragel to emit the state machine's execution code.
+Ragel expects several variables to be available to this code. At a very minimum, the
+generated code needs access to the current character position \verb|p|, the ending
+position \verb|pe| and the current state \verb|cs| (though \verb|pe|
+can be omitted using the \verb|noend| write option).
+The \verb|p| variable is the cursor that the execute code will
+used to traverse the input. The \verb|pe| variable should be set up to point to one
+position past the last valid character in the buffer.
+
+Other variables are needed when certain features are used. For example using
+the \verb|fcall| or \verb|fret| statements requires \verb|stack| and
+\verb|top| variables to be defined. If a longest-match construction is used,
+variables for managing backtracking are required.
+
+The write exec statement has one option. The \verb|noend| option tells Ragel
+to generate code that ignores the end position \verb|pe|. In this
+case the user must explicitly break out of the processing loop using
+\verb|fbreak|, otherwise the machine will continue to process characters until
+it moves into the error state. This option is useful if one wishes to process a
+null terminated string. Rather than traverse the string to discover then length
+before processing the input, the user can break out when the null character is
+seen. The example in Figure \ref{fbreak-example} shows the use of the
+\verb|noend| write option and the \verb|fbreak| statement for processing a string.
+
+\subsection{Write Exports}
+\label{export}
+
+\begin{verbatim}
+write exports;
+\end{verbatim}
+\verbspace
+
+The export feature can be used to export simple machine definitions. Machine definitions
+are marked for export using the \verb|export| keyword.
+
+\begin{verbatim}
+export machine_to_export = 0x44;
+\end{verbatim}
+\verbspace
+
+When the write exports statement is used these machines are
+written out in the generated code. Defines are used for C and constant integers
+are used for D, Java and Ruby. See Section \ref{import} for a description of the
+import statement.
+
+\section{Maintaining Pointers to Input Data}
+
+In the creation of any parser it is not uncommon to require the collection of
+the data being parsed. It is always possible to collect data into a growable
+buffer as the machine moves over it, however the copying of data is a somewhat
+wasteful use of processor cycles. The most efficient way to collect data from
+the parser is to set pointers into the input then later reference them. This
+poses a problem for uses of Ragel where the input data arrives in blocks, such
+as over a socket or from a file. If a pointer is set in one buffer block but
+must be used while parsing a following buffer block, some extra consideration
+to correctness must be made.
+
+The scanner constructions exhibit this problem, requiring the maintenance
+code described in Section \ref{generating-scanners}. If a longest-match
+construction has been used somewhere in the machine then it is possible to
+take advantage of the required prefix maintenance code in the driver program to
+ensure pointers to the input are always valid. If laying down a pointer one can
+set \verb|ts| at the same spot or ahead of it. When data is shifted in
+between loops the user must also shift the pointer. In this way it is possible
+to maintain pointers to the input that will always be consistent.
+
+\begin{figure}
+\small
+\begin{verbatim}
+ int have = 0;
+ while ( 1 ) {
+ char *p, *pe, *data = buf + have;
+ int len, space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ fprintf(stderr, "BUFFER OUT OF SPACE\n");
+ exit(1);
+ }
+
+ len = fread( data, 1, space, stdin );
+ if ( len == 0 )
+ break;
+
+ /* Find the last newline by searching backwards. */
+ p = buf;
+ pe = data + len - 1;
+ while ( *pe != '\n' && pe >= buf )
+ pe--;
+ pe += 1;
+
+ %% write exec;
+
+ /* How much is still in the buffer? */
+ have = data + len - pe;
+ if ( have > 0 )
+ memmove( buf, pe, have );
+
+ if ( len < space )
+ break;
+ }
+\end{verbatim}
+\verbspace
+\caption{An example of line-oriented processing.
+}
+\label{line-oriented}
+\end{figure}
+
+In general, there are two approaches for guaranteeing the consistency of
+pointers to input data. The first approach is the one just described;
+lay down a marker from an action,
+then later ensure that the data the marker points to is preserved ahead of
+the buffer on the next execute invocation. This approach is good because it
+allows the parser to decide on the pointer-use boundaries, which can be
+arbitrarily complex parsing conditions. A downside is that it requires any
+pointers that are set to be corrected in between execute invocations.
+
+The alternative is to find the pointer-use boundaries before invoking the execute
+routine, then pass in the data using these boundaries. For example, if the
+program must perform line-oriented processing, the user can scan backwards from
+the end of an input block that has just been read in and process only up to the
+first found newline. On the next input read, the new data is placed after the
+partially read line and processing continues from the beginning of the line.
+An example of line-oriented processing is given in Figure \ref{line-oriented}.
+
+\section{Specifying the Host Language}
+
+The \verb|ragel| program has a number of options for specifying the host
+language. The host-language options are:
+
+\begin{itemize}
+\item \verb|-C | for C/C++/Objective-C code (default)
+\item \verb|-D | for D code.
+\item \verb|-Z | for Go code.
+\item \verb|-J | for Java code.
+\item \verb|-R | for Ruby code.
+\item \verb|-A | for C\# code.
+\end{itemize}
+
+\section{Choosing a Generated Code Style}
+\label{genout}
+
+There are three styles of code output to choose from. Code style affects the
+size and speed of the compiled binary. Changing code style does not require any
+change to the Ragel program. There are two table-driven formats and a goto
+driven format.
+
+In addition to choosing a style to emit, there are various levels of action
+code reuse to choose from. The maximum reuse levels (\verb|-T0|, \verb|-F0|
+and \verb|-G0|) ensure that no FSM action code is ever duplicated by encoding
+each transition's action list as static data and iterating
+through the lists on every transition. This will normally result in a smaller
+binary. The less action reuse options (\verb|-T1|, \verb|-F1| and \verb|-G1|)
+will usually produce faster running code by expanding each transition's action
+list into a single block of code, eliminating the need to iterate through the
+lists. This duplicates action code instead of generating the logic necessary
+for reuse. Consequently the binary will be larger. However, this tradeoff applies to
+machines with moderate to dense action lists only. If a machine's transitions
+frequently have less than two actions then the less reuse options will actually
+produce both a smaller and a faster running binary due to less action sharing
+overhead. The best way to choose the appropriate code style for your
+application is to perform your own tests.
+
+The table-driven FSM represents the state machine as constant static data. There are
+tables of states, transitions, indices and actions. The current state is
+stored in a variable. The execution is simply a loop that looks up the current
+state, looks up the transition to take, executes any actions and moves to the
+target state. In general, the table-driven FSM can handle any machine, produces
+a smaller binary and requires a less expensive host language compile, but
+results in slower running code. Since the table-driven format is the most
+flexible it is the default code style.
+
+The flat table-driven machine is a table-based machine that is optimized for
+small alphabets. Where the regular table machine uses the current character as
+the key in a binary search for the transition to take, the flat table machine
+uses the current character as an index into an array of transitions. This is
+faster in general, however is only suitable if the span of possible characters
+is small.
+
+The goto-driven FSM represents the state machine using goto and switch
+statements. The execution is a flat code block where the transition to take is
+computed using switch statements and directly executable binary searches. In
+general, the goto FSM produces faster code but results in a larger binary and a
+more expensive host language compile.
+
+The goto-driven format has an additional action reuse level (\verb|-G2|) that
+writes actions directly into the state transitioning logic rather than putting
+all the actions together into a single switch. Generally this produces faster
+running code because it allows the machine to encode the current state using
+the processor's instruction pointer. Again, sparse machines may actually
+compile to smaller binaries when \verb|-G2| is used due to less state and
+action management overhead. For many parsing applications \verb|-G2| is the
+preferred output format.
+
+\begin{center}
+
+Code Output Style Options
+
+\begin{tabular}{|c|c|c|}
+\hline
+\verb|-T0|&binary search table-driven&C/D/Java/Ruby/C\#\\
+\hline
+\verb|-T1|&binary search, expanded actions&C/D/Ruby/C\#\\
+\hline
+\verb|-F0|&flat table-driven&C/D/Ruby/C\#\\
+\hline
+\verb|-F1|&flat table, expanded actions&C/D/Ruby/C\#\\
+\hline
+\verb|-G0|&goto-driven&C/D/C\#\\
+\hline
+\verb|-G1|&goto, expanded actions&C/D/C\#\\
+\hline
+\verb|-G2|&goto, in-place actions&C/D/Go\\
+\hline
+\end{tabular}
+\end{center}
+
+\chapter{Beyond the Basic Model}
+
+\section{Parser Modularization}
+\label{modularization}
+
+It is possible to use Ragel's machine construction and action embedding
+operators to specify an entire parser using a single regular expression. In
+many cases this is the desired way to specify a parser in Ragel. However, in
+some scenarios the language to parse may be so large that it is difficult to
+think about it as a single regular expression. It may also shift between distinct
+parsing strategies, in which case modularization into several coherent blocks
+of the language may be appropriate.
+
+It may also be the case that patterns that compile to a large number of states
+must be used in a number of different contexts and referencing them in each
+context results in a very large state machine. In this case, an ability to reuse
+parsers would reduce code size.
+
+To address this, distinct regular expressions may be instantiated and linked
+together by means of a jumping and calling mechanism. This mechanism is
+analogous to the jumping to and calling of processor instructions. A jump
+command, given in action code, causes control to be immediately passed to
+another portion of the machine by way of setting the current state variable. A
+call command causes the target state of the current transition to be pushed to
+a state stack before control is transferred. Later on, the original location
+may be returned to with a return statement. In the following example, distinct
+state machines are used to handle the parsing of two types of headers.
+
+% GENERATE: call
+% %%{
+% machine call;
+\begin{inline_code}
+\begin{verbatim}
+action return { fret; }
+action call_date { fcall date; }
+action call_name { fcall name; }
+
+# A parser for date strings.
+date := [0-9][0-9] '/'
+ [0-9][0-9] '/'
+ [0-9][0-9][0-9][0-9] '\n' @return;
+
+# A parser for name strings.
+name := ( [a-zA-Z]+ | ' ' )** '\n' @return;
+
+# The main parser.
+headers =
+ ( 'from' | 'to' ) ':' @call_name |
+ ( 'departed' | 'arrived' ) ':' @call_date;
+
+main := headers*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% %% write data;
+% void f()
+% {
+% %% write init;
+% %% write exec;
+% }
+% END GENERATE
+
+Calling and jumping should be used carefully as they are operations that take
+one out of the domain of regular languages. A machine that contains a call or
+jump statement in one of its actions should be used as an argument to a machine
+construction operator only with considerable care. Since DFA transitions may
+actually represent several NFA transitions, a call or jump embedded in one
+machine can inadvertently terminate another machine that it shares prefixes
+with. Despite this danger, theses statements have proven useful for tying
+together sub-parsers of a language into a parser for the full language,
+especially for the purpose of modularizing code and reducing the number of
+states when the machine contains frequently recurring patterns.
+
+Section \ref{vals} describes the jump and call statements that are used to
+transfer control. These statements make use of two variables that must be
+declared by the user, \verb|stack| and \verb|top|. The \verb|stack| variable
+must be an array of integers and \verb|top| must be a single integer, which
+will point to the next available space in \verb|stack|. Sections \ref{prepush}
+and \ref{postpop} describe the Pre-Push and Post-Pop statements which can be
+used to implement a dynamically resizable array.
+
+\section{Referencing Names}
+\label{labels}
+
+This section describes how to reference names in epsilon transitions (Section
+\ref{state-charts}) and
+action-based control-flow statements such as \verb|fgoto|. There is a hierarchy
+of names implied in a Ragel specification. At the top level are the machine
+instantiations. Beneath the instantiations are labels and references to machine
+definitions. Beneath those are more labels and references to definitions, and
+so on.
+
+Any name reference may contain multiple components separated with the \verb|::|
+compound symbol. The search for the first component of a name reference is
+rooted at the join expression that the epsilon transition or action embedding
+is contained in. If the name reference is not contained in a join,
+the search is rooted at the machine definition that the epsilon transition or
+action embedding is contained in. Each component after the first is searched
+for beginning at the location in the name tree that the previous reference
+component refers to.
+
+In the case of action-based references, if the action is embedded more than
+once, the local search is performed for each embedding and the result is the
+union of all the searches. If no result is found for action-based references then
+the search is repeated at the root of the name tree. Any action-based name
+search may be forced into a strictly global search by prefixing the name
+reference with \verb|::|.
+
+The final component of the name reference must resolve to a unique entry point.
+If a name is unique in the entire name tree it can be referenced as is. If it
+is not unique it can be specified by qualifying it with names above it in the
+name tree. However, it can always be renamed.
+
+% FIXME: Should fit this in somewhere.
+% Some kinds of name references are illegal. Cannot call into longest-match
+% machine, can only call its start state. Cannot make a call to anywhere from
+% any part of a longest-match machine except a rule's action. This would result
+% in an eventual return to some point inside a longest-match other than the
+% start state. This is banned for the same reason a call into the LM machine is
+% banned.
+
+
+\section{Scanners}
+\label{generating-scanners}
+
+Scanners are very much intertwined with regular-languages and their
+corresponding processors. For this reason Ragel supports the definition of
+scanners. The generated code will repeatedly attempt to match patterns from a
+list, favouring longer patterns over shorter patterns. In the case of
+equal-length matches, the generated code will favour patterns that appear ahead
+of others. When a scanner makes a match it executes the user code associated
+with the match, consumes the input then resumes scanning.
+
+\begin{verbatim}
+<machine_name> := |*
+ pattern1 => action1;
+ pattern2 => action2;
+ ...
+ *|;
+\end{verbatim}
+\verbspace
+
+On the surface, Ragel scanners are similar to those defined by Lex. Though
+there is a key distinguishing feature: patterns may be arbitrary Ragel
+expressions and can therefore contain embedded code. With a Ragel-based scanner
+the user need not wait until the end of a pattern before user code can be
+executed.
+
+Scanners can be used to process sub-languages, as well as for tokenizing
+programming languages. In the following example a scanner is used to tokenize
+the contents of a header field.
+
+\begin{inline_code}
+\begin{verbatim}
+word = [a-z]+;
+head_name = 'Header';
+
+header := |*
+ word;
+ ' ';
+ '\n' => { fret; };
+*|;
+
+main := ( head_name ':' @{ fcall header; } )*;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+
+The scanner construction has a purpose similar to the longest-match kleene star
+operator \verb|**|. The key
+difference is that a scanner is able to backtrack to match a previously matched
+shorter string when the pursuit of a longer string fails. For this reason the
+scanner construction operator is not a pure state machine construction
+operator. It relies on several variables that enable it to backtrack and make
+pointers to the matched input text available to the user. For this reason
+scanners must be immediately instantiated. They cannot be defined inline or
+referenced by another expression. Scanners must be jumped to or called.
+
+Scanners rely on the \verb|ts|, \verb|te| and \verb|act|
+variables to be present so that they can backtrack and make pointers to the
+matched text available to the user. If input is processed using multiple calls
+to the execute code then the user must ensure that when a token is only
+partially matched that the prefix is preserved on the subsequent invocation of
+the execute code.
+
+The \verb|ts| variable must be defined as a pointer to the input data.
+It is used for recording where the current token match begins. This variable
+may be used in action code for retrieving the text of the current match. Ragel
+ensures that in between tokens and outside of the longest-match machines that
+this pointer is set to null. In between calls to the execute code the user must
+check if \verb|ts| is set and if so, ensure that the data it points to is
+preserved ahead of the next buffer block. This is described in more detail
+below.
+
+The \verb|te| variable must also be defined as a pointer to the input data.
+It is used for recording where a match ends and where scanning of the next
+token should begin. This can also be used in action code for retrieving the
+text of the current match.
+
+The \verb|act| variable must be defined as an integer type. It is used for
+recording the identity of the last pattern matched when the scanner must go
+past a matched pattern in an attempt to make a longer match. If the longer
+match fails it may need to consult the \verb|act| variable. In some cases, use
+of the \verb|act|
+variable can be avoided because the value of the current state is enough
+information to determine which token to accept, however in other cases this is
+not enough and so the \verb|act| variable is used.
+
+When the longest-match operator is in use, the user's driver code must take on
+some buffer management functions. The following algorithm gives an overview of
+the steps that should be taken to properly use the longest-match operator.
+
+\begin{itemize}
+\item Read a block of input data.
+\item Run the execute code.
+\item If \verb|ts| is set, the execute code will expect the incomplete
+token to be preserved ahead of the buffer on the next invocation of the execute
+code.
+\begin{itemize}
+\item Shift the data beginning at \verb|ts| and ending at \verb|pe| to the
+beginning of the input buffer.
+\item Reset \verb|ts| to the beginning of the buffer.
+\item Shift \verb|te| by the distance from the old value of \verb|ts|
+to the new value. The \verb|te| variable may or may not be valid. There is
+no way to know if it holds a meaningful value because it is not kept at null
+when it is not in use. It can be shifted regardless.
+\end{itemize}
+\item Read another block of data into the buffer, immediately following any
+preserved data.
+\item Run the scanner on the new data.
+\end{itemize}
+
+Figure \ref{preserve_example} shows the required handling of an input stream in
+which a token is broken by the input block boundaries. After processing up to
+and including the ``t'' of ``characters'', the prefix of the string token must be
+retained and processing should resume at the ``e'' on the next iteration of
+the execute code.
+
+If one uses a large input buffer for collecting input then the number of times
+the shifting must be done will be small. Furthermore, if one takes care not to
+define tokens that are allowed to be very long and instead processes these
+items using pure state machines or sub-scanners, then only a small amount of
+data will ever need to be shifted.
+
+\begin{figure}
+\small
+\begin{verbatim}
+ a) A stream "of characters" to be scanned.
+ | | |
+ p ts pe
+
+ b) "of characters" to be scanned.
+ | | |
+ ts p pe
+\end{verbatim}
+\verbspace
+\caption{Following an invocation of the execute code there may be a partially
+matched token (a). The data of the partially matched token
+must be preserved ahead of the new data on the next invocation (b).
+}
+\label{preserve_example}
+\end{figure}
+
+Since scanners attempt to make the longest possible match of input, patterns
+such as identifiers require one character of lookahead in order to trigger a
+match. In the case of the last token in the input stream the user must ensure
+that the \verb|eof| variable is set so that the final token is flushed out.
+
+An example scanner processing loop is given in Figure \ref{scanner-loop}.
+
+\begin{figure}
+\small
+\begin{verbatim}
+ int have = 0;
+ bool done = false;
+ while ( !done ) {
+ /* How much space is in the buffer? */
+ int space = BUFSIZE - have;
+ if ( space == 0 ) {
+ /* Buffer is full. */
+ cerr << "TOKEN TOO BIG" << endl;
+ exit(1);
+ }
+
+ /* Read in a block after any data we already have. */
+ char *p = inbuf + have;
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ char *pe = p + len;
+ char *eof = 0;
+
+ /* If no data was read indicate EOF. */
+ if ( len == 0 ) {
+ eof = pe;
+ done = true;
+ }
+
+ %% write exec;
+
+ if ( cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ if ( ts == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - ts;
+ memmove( inbuf, ts, have );
+ te = inbuf + (te-ts);
+ ts = inbuf;
+ }
+ }
+\end{verbatim}
+\verbspace
+\caption{A processing loop for a scanner.
+}
+\label{scanner-loop}
+\end{figure}
+
+\section{State Charts}
+\label{state-charts}
+
+In addition to supporting the construction of state machines using regular
+languages, Ragel provides a way to manually specify state machines using
+state charts. The comma operator combines machines together without any
+implied transitions. The user can then manually link machines by specifying
+epsilon transitions with the \verb|->| operator. Epsilon transitions are drawn
+between the final states of a machine and entry points defined by labels. This
+makes it possible to build machines using the explicit state-chart method while
+making minimal changes to the Ragel language.
+
+An interesting feature of Ragel's state chart construction method is that it
+can be mixed freely with regular expression constructions. A state chart may be
+referenced from within a regular expression, or a regular expression may be
+used in the definition of a state chart transition.
+
+\subsection{Join}
+
+\verb|expr , expr , ...|
+
+Join a list of machines together without
+drawing any transitions, without setting up a start state, and without
+designating any final states. Transitions between the machines may be specified
+using labels and epsilon transitions. The start state must be explicity
+specified with the ``start'' label. Final states may be specified with an
+epsilon transition to the implicitly created ``final'' state. The join
+operation allows one to build machines using a state chart model.
+
+\subsection{Label}
+
+\verb|label: expr|
+
+Attaches a label to an expression. Labels can be
+used as the target of epsilon transitions and explicit control transfer
+statements such as \verb|fgoto| and \verb|fnext| in action
+code.
+
+\subsection{Epsilon}
+
+\verb|expr -> label|
+
+Draws an epsilon transition to the state defined
+by \verb|label|. Epsilon transitions are made deterministic when join
+operators are evaluated. Epsilon transitions that are not in a join operation
+are made deterministic when the machine definition that contains the epsilon is
+complete. See Section \ref{labels} for information on referencing labels.
+
+\subsection{Simplifying State Charts}
+
+There are two benefits to providing state charts in Ragel. The first is that it
+allows us to take a state chart with a full listing of states and transitions
+and simplify it in selective places using regular expressions.
+
+The state chart method of specifying parsers is very common. It is an
+effective programming technique for producing robust code. The key disadvantage
+becomes clear when one attempts to comprehend a large parser specified in this
+way. These programs usually require many lines, causing logic to be spread out
+over large distances in the source file. Remembering the function of a large
+number of states can be difficult and organizing the parser in a sensible way
+requires discipline because branches and repetition present many file layout
+options. This kind of programming takes a specification with inherent
+structure such as looping, alternation and concatenation and expresses it in a
+flat form.
+
+If we could take an isolated component of a manually programmed state chart,
+that is, a subset of states that has only one entry point, and implement it
+using regular language operators then we could eliminate all the explicit
+naming of the states contained in it. By eliminating explicitly named states
+and replacing them with higher-level specifications we simplify a state machine
+specification.
+
+For example, sometimes chains of states are needed, with only a small number of
+possible characters appearing along the chain. These can easily be replaced
+with a concatenation of characters. Sometimes a group of common states
+implement a loop back to another single portion of the machine. Rather than
+manually duplicate all the transitions that loop back, we may be able to
+express the loop using a kleene star operator.
+
+Ragel allows one to take this state map simplification approach. We can build
+state machines using a state map model and implement portions of the state map
+using regular languages. In place of any transition in the state machine,
+entire sub-machines can be given. These can encapsulate functionality
+defined elsewhere. An important aspect of the Ragel approach is that when we
+wrap up a collection of states using a regular expression we do not lose
+access to the states and transitions. We can still execute code on the
+transitions that we have encapsulated.
+
+\subsection{Dropping Down One Level of Abstraction}
+\label{down}
+
+The second benefit of incorporating state charts into Ragel is that it permits
+us to bypass the regular language abstraction if we need to. Ragel's action
+embedding operators are sometimes insufficient for expressing certain parsing
+tasks. In the same way that is useful for C language programmers to drop down
+to assembly language programming using embedded assembler, it is sometimes
+useful for the Ragel programmer to drop down to programming with state charts.
+
+In the following example, we wish to buffer the characters of an XML CDATA
+sequence. The sequence is terminated by the string \verb|]]>|. The challenge
+in our application is that we do not wish the terminating characters to be
+buffered. An expression of the form \verb|any* @buffer :>> ']]>'| will not work
+because the buffer will always contain the characters \verb|]]| on the end.
+Instead, what we need is to delay the buffering of \verb|]|
+characters until a time when we
+abandon the terminating sequence and go back into the main loop. There is no
+easy way to express this using Ragel's regular expression and action embedding
+operators, and so an ability to drop down to the state chart method is useful.
+
+% GENERATE: dropdown
+% OPT: -p
+% %%{
+% machine dropdown;
+\begin{inline_code}
+\begin{verbatim}
+action bchar { buff( fpc ); } # Buffer the current character.
+action bbrack1 { buff( "]" ); }
+action bbrack2 { buff( "]]" ); }
+
+CDATA_body =
+start: (
+ ']' -> one |
+ (any-']') @bchar ->start
+),
+one: (
+ ']' -> two |
+ [^\]] @bbrack1 @bchar ->start
+),
+two: (
+ '>' -> final |
+ ']' @bbrack1 -> two |
+ [^>\]] @bbrack2 @bchar ->start
+);
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% main := CDATA_body;
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{dropdown}
+\end{center}
+\graphspace
+
+
+\section{Semantic Conditions}
+\label{semantic}
+
+Many communication protocols contain variable-length fields, where the length
+of the field is given ahead of the field as a value. This
+problem cannot be expressed using regular languages because of its
+context-dependent nature. The prevalence of variable-length fields in
+communication protocols motivated us to introduce semantic conditions into
+the Ragel language.
+
+A semantic condition is a block of user code that is interpreted as an
+expression and evaluated immediately
+before a transition is taken. If the code returns a value of true, the
+transition may be taken. We can now embed code that extracts the length of a
+field, then proceed to match $n$ data values.
+
+% GENERATE: conds1
+% OPT: -p
+% %%{
+% machine conds1;
+% number = digit+;
+\begin{inline_code}
+\begin{verbatim}
+action rec_num { i = 0; n = getnumber(); }
+action test_len { i++ < n }
+data_fields = (
+ 'd'
+ [0-9]+ %rec_num
+ ':'
+ ( [a-z] when test_len )*
+)**;
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% main := data_fields;
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{conds1}
+\end{center}
+\graphspace
+
+The Ragel implementation of semantic conditions does not force us to give up the
+compositional property of Ragel definitions. For example, a machine that tests
+the length of a field using conditions can be unioned with another machine
+that accepts some of the same strings, without the two machines interfering with
+one another. The user need not be concerned about whether or not the result of the
+semantic condition will affect the matching of the second machine.
+
+To see this, first consider that when a user associates a condition with an
+existing transition, the transition's label is translated from the base character
+to its corresponding value in the space that represents ``condition $c$ true''. Should
+the determinization process combine a state that has a conditional transition
+with another state that has a transition on the same input character but
+without a condition, then the condition-less transition first has its label
+translated into two values, one to its corresponding value in the space that
+represents ``condition $c$ true'' and another to its corresponding value in the
+space that represents ``condition $c$ false''. It
+is then safe to combine the two transitions. This is shown in the following
+example. Two intersecting patterns are unioned, one with a condition and one
+without. The condition embedded in the first pattern does not affect the second
+pattern.
+
+% GENERATE: conds2
+% OPT: -p
+% %%{
+% machine conds2;
+% number = digit+;
+\begin{inline_code}
+\begin{verbatim}
+action test_len { i++ < n }
+action one { /* accept pattern one */ }
+action two { /* accept pattern two */ }
+patterns =
+ ( [a-z] when test_len )+ %one |
+ [a-z][a-z0-9]* %two;
+main := patterns '\n';
+\end{verbatim}
+\end{inline_code}
+\verbspace
+% }%%
+% END GENERATE
+
+\graphspace
+\begin{center}
+\includegraphics[scale=0.55]{conds2}
+\end{center}
+\graphspace
+
+There are many more potential uses for semantic conditions. The user is free to
+use arbitrary code and may therefore perform actions such as looking up names
+in dictionaries, validating input using external parsing mechanisms or
+performing checks on the semantic structure of input seen so far. In the next
+section we describe how Ragel accommodates several common parser engineering
+problems.
+
+The semantic condition feature works only with alphabet types that are smaller
+in width than the \verb|long| type. To implement semantic conditions Ragel
+needs to be able to allocate characters from the alphabet space. Ragel uses
+these allocated characters to express "character C with condition P true" or "C
+with P false." Since internally Ragel uses longs to store characters there is
+no room left in the alphabet space unless an alphabet type smaller than long is
+used.
+
+\section{Implementing Lookahead}
+
+There are a few strategies for implementing lookahead in Ragel programs.
+Leaving actions, which are described in Section \ref{out-actions}, can be
+used as a form of lookahead. Ragel also provides the \verb|fhold| directive
+which can be used in actions to prevent the machine from advancing over the
+current character. It is also possible to manually adjust the current character
+position by shifting it backwards using \verb|fexec|, however when this is
+done, care must be taken not to overstep the beginning of the current buffer
+block. In both the use of \verb|fhold| and \verb|fexec| the user must be
+cautious of combining the resulting machine with another in such a way that the
+transition on which the current position is adjusted is not combined with a
+transition from the other machine.
+
+\section{Parsing Recursive Language Structures}
+
+In general Ragel cannot handle recursive structures because the grammar is
+interpreted as a regular language. However, depending on what needs to be
+parsed it is sometimes practical to implement the recursive parts using manual
+coding techniques. This often works in cases where the recursive structures are
+simple and easy to recognize, such as in the balancing of parentheses
+
+One approach to parsing recursive structures is to use actions that increment
+and decrement counters or otherwise recognize the entry to and exit from
+recursive structures and then jump to the appropriate machine defnition using
+\verb|fcall| and \verb|fret|. Alternatively, semantic conditions can be used to
+test counter variables.
+
+A more traditional approach is to call a separate parsing function (expressed
+in the host language) when a recursive structure is entered, then later return
+when the end is recognized.
+
+\end{document}
diff --git a/test/rhsref1.lm b/test/rhsref1.lm
new file mode 100644
index 0000000..c905d2a
--- /dev/null
+++ b/test/rhsref1.lm
@@ -0,0 +1,117 @@
+##### LM #####
+lex
+ literal `var `if `then `else `while `do `for `read `write
+ `end `to `goto
+ literal `:= `!= `; `+ `- `* `/ `= `( `) `:
+
+ ignore /'//' [^\n]* '\n'/
+ ignore /[\n\t ]+/
+ token id /[a-zA-Z_]+/
+ token integernumber /[0-9]+/
+ token stringlit /'"' [^"]* '"'/
+end
+
+def program
+ [statement*]
+
+def statement
+ [declaration]
+| [assignment_statement]
+| [if_statement]
+| [while_statement]
+| [do_statement]
+| [for_statement]
+| [read_statement]
+| [write_statement]
+| [labelled_statement]
+| [goto_statement]
+
+def declaration
+ [`var id `;]
+
+def assignment_statement
+ [id `:= expression `;]
+
+def if_statement
+ [`if expression `then statement* opt_else_statement `end]
+
+def opt_else_statement
+ [`else statement*]
+| []
+
+def while_statement
+ [`while expression `do statement* `end]
+
+def do_statement
+ [`do statement* `while expression `;]
+
+def for_statement
+ [`for id `:= expression `to expression `do statement* `end]
+
+def read_statement
+ [`read id `;]
+
+def write_statement
+ [`write expression `;]
+
+def expression
+ [Term: term]
+| [expression eqop Term: term]
+
+def eqop [`=] | [`!=]
+
+def term
+ [Factor: factor]
+| [term addop Factor: factor]
+
+def addop [`+] | [`-]
+
+def factor
+ [Primary: primary]
+| [factor mulop Primary: primary]
+
+def mulop [`*] | [`/]
+
+def primary
+ [id]
+| [lit]
+| [`( expression `)]
+
+def lit
+ [integernumber]
+| [stringlit]
+
+def labelled_statement
+ [id `: statement]
+
+def goto_statement
+ [`goto id `;]
+
+parse P: program[stdin]
+
+for E: expression in P {
+ print( ^(E.Term.Factor.Primary) '\n' )
+}
+
+##### IN #####
+
+var a;
+a := 1;
+
+head:
+
+a := a + 1;
+c := d;
+
+if a = 10 then
+ goto head;
+end
+
+hi := there;
+##### EXP #####
+1
+1
+d
+10
+a
+there
diff --git a/test/rubyhere.lm b/test/rubyhere.lm
new file mode 100644
index 0000000..836b18c
--- /dev/null
+++ b/test/rubyhere.lm
@@ -0,0 +1,123 @@
+##### LM #####
+context rubyhere
+ rl ident_pattern /[a-zA-Z_][a-zA-Z_0-9]*/
+ rl number_pattern /[0-9]+/
+
+ lex
+ ignore /[ \t\n]+/
+ token id /ident_pattern/
+ token number /number_pattern/
+ literal `<< `* `, `( `) `!
+ end
+
+ HereId: str
+
+ token rest_of_line /[^\n]*'\n'/
+
+ lex
+ ignore /[ \t\n]+/
+ token here_id
+ HereData: here_data
+ /ident_pattern/
+ {
+ # Take the text of the here_id from the input stream.
+ HereId = input.pull( match_length )
+
+ # Get the data up to the rest of the line.
+ parse_stop ROL: rest_of_line(ctx)[ input ]
+
+ # Parse the heredoc data.
+ parse_stop HereData: here_data(ctx)[ input ]
+
+ # Push the rest-of-line data back to the input stream.
+ input.push( $ROL )
+
+ # Send the here_id token. Attach the heredoc data as an attribute.
+ input.push( make_token( typeid<here_id> HereId HereData ) )
+ }
+ end
+
+ lex
+ token here_close_id
+ / ident_pattern '\n' /
+ {
+ if match_text == HereId + '\n' {
+ input.push( make_token(
+ typeid<here_close_id>
+ input.pull( match_length ) ) )
+ }
+ else
+ input.push( make_token( typeid<here_line> input.pull(match_length) ) )
+ }
+
+ token here_line
+ / [^\n]* '\n' /
+ end
+
+ def here_data
+ [here_line* here_close_id]
+
+ def heredoc
+ [`<< here_id]
+
+ def primary
+ [id]
+ | [number]
+ | [heredoc]
+
+ def arglist
+ [primary arglist_more*]
+
+ def arglist_more
+ [`, primary]
+
+ def call
+ [id `( arglist? `)]
+
+ def statement
+ [primary]
+ | [call]
+
+ token foobar /any+/
+
+ def item
+ [statement `!]
+ | [foobar]
+
+ def start
+ [item*]
+end # rubyhere
+
+cons RubyHere: rubyhere[]
+
+parse S: rubyhere::start(RubyHere)[ stdin ]
+
+print_xml(S)
+print('\n')
+##### IN #####
+print( <<DATA1, more, <<DATA2, 99 )
+"&^#(@ almost
+!arbitrary text!
+DATA1
+hello
+world
+DATA2
+!
+print( <<DATA1, more, <<DATA2, 99 )
+"&^#(@ almost
+!arbitrary text!
+DATA1
+hello
+world
+DATA2
+# error here
+##### EXP #####
+<rubyhere::start><rubyhere::_repeat_item><rubyhere::item><rubyhere::statement><rubyhere::call><rubyhere::id>print</rubyhere::id><rubyhere::_literal_000d>(</rubyhere::_literal_000d><rubyhere::_opt_arglist><rubyhere::arglist><rubyhere::primary><rubyhere::heredoc><rubyhere::_literal_0007>&lt;&lt;</rubyhere::_literal_0007><rubyhere::here_id>DATA1</rubyhere::here_id></rubyhere::heredoc></rubyhere::primary><rubyhere::_repeat_arglist_more><rubyhere::arglist_more><rubyhere::_literal_000b>,</rubyhere::_literal_000b><rubyhere::primary><rubyhere::id>more</rubyhere::id></rubyhere::primary></rubyhere::arglist_more><rubyhere::arglist_more><rubyhere::_literal_000b>,</rubyhere::_literal_000b><rubyhere::primary><rubyhere::heredoc><rubyhere::_literal_0007>&lt;&lt;</rubyhere::_literal_0007><rubyhere::here_id>DATA2</rubyhere::here_id></rubyhere::heredoc></rubyhere::primary></rubyhere::arglist_more><rubyhere::arglist_more><rubyhere::_literal_000b>,</rubyhere::_literal_000b><rubyhere::primary><rubyhere::number>99</rubyhere::number></rubyhere::primary></rubyhere::arglist_more></rubyhere::_repeat_arglist_more></rubyhere::arglist></rubyhere::_opt_arglist><rubyhere::_literal_000f>)</rubyhere::_literal_000f></rubyhere::call></rubyhere::statement><rubyhere::_literal_0011>!</rubyhere::_literal_0011></rubyhere::item><rubyhere::item><rubyhere::foobar>print( &lt;&lt;DATA1, more, &lt;&lt;DATA2, 99 )
+"&amp;^#(@ almost
+!arbitrary text!
+DATA1
+hello
+world
+DATA2
+# error here
+</rubyhere::foobar></rubyhere::item></rubyhere::_repeat_item></rubyhere::start>
diff --git a/test/runtests.sh b/test/runtests.sh
new file mode 100755
index 0000000..bed1401
--- /dev/null
+++ b/test/runtests.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+#
+
+# Test cases contain sections giving the program, input and expected output.
+
+###### LM #####
+#
+# colm program
+#
+###### ARGS #####
+#
+# program arguments
+#
+###### IN #####
+#
+# program input
+#
+###### EXP #####
+#
+# expected output
+#
+###### EXIT ######
+#
+# expected exit value
+#
+
+#######################################
+
+WORKING=working
+COLM=../src/colm
+ERRORS=0
+
+cd `dirname $0`
+test -d $WORKING || mkdir $WORKING
+
+function die()
+{
+ echo
+ echo "$@"
+ echo
+ exit 1
+}
+
+function sig_exit()
+{
+ echo
+ exit 1;
+}
+
+# Parse args.
+while getopts vdm opt; do
+ case $opt in
+ v)
+ verbose=true;
+ ;;
+ d)
+ diff=true;
+ ;;
+ m)
+ VALGRIND="valgrind --leak-check=full --show-reachable=yes "
+ ;;
+ esac
+done
+shift $(($OPTIND - 1))
+
+# The files to process. If none given then glob all functions and pcap test confs.
+if [ $# != 0 ]; then
+ TEST_PAT="$*"
+else
+ TEST_PAT='*.lm'
+fi
+
+function cat_section
+{
+ local section=$1
+ local nth=$2
+ local in=$3
+
+ # Print Nth instance of the section
+ awk -vsection=$section -vnth=$nth '
+ /#+ *[a-zA-Z]+ *#+/ {
+ gsub( "[ #\n]", "", $0 );
+ in_section = 0
+ if ( $0 == section ) {
+ if ( n == nth ) {
+ in_section = 1;
+ found = 1;
+ }
+ n += 1
+ }
+ next;
+ }
+
+ in_section {
+ print $0;
+ }
+
+ END {
+ exit( found ? 0 : 1 )
+ }
+ ' $in | awk '
+ /--noeol$/ {
+ gsub(/--noeol$/,"");
+ printf("%s", $0);
+ next;
+ }
+ { print $0 }
+ '
+ return ${PIPESTATUS[0]};
+}
+
+function section
+{
+ local section=$1
+ local nth=$2
+ local in=$3
+ local out=$4
+
+ cat_section $section $nth $in > $out
+
+ # Remove the file if no section was found
+ [ $? = 0 ] || rm $out
+}
+
+function runtests()
+{
+ for TST in $TEST_PAT; do
+ ROOT=${TST/.lm}
+ LM=$WORKING/$ROOT.lm
+ ARGS=$WORKING/$ROOT.args
+ IN=$WORKING/$ROOT.in
+ EXP=$WORKING/$ROOT.exp
+
+ section LM 0 $TST $LM
+
+ BIN=$WORKING/$ROOT
+ OUT=$WORKING/$ROOT.out
+ DIFF=$WORKING/$ROOT.diff
+ LOG=$WORKING/$ROOT.log
+
+ if [ '!' -f $LM ]; then
+ echo "ERROR: $TST cannot be run: no LM section"
+ ERRORS=$(( ERRORS + 1 ))
+ continue
+ fi
+
+ # Compilation.
+ $COLM $LM &> $LOG
+ if [ $? != 0 ]; then
+ echo "ERROR: $TST cannot be run: compilation error"
+ ERRORS=$(( ERRORS + 1 ))
+ continue
+ fi
+
+ Nth=0
+ while true; do
+ section EXP $Nth $TST $EXP
+
+ # Stop when we have no Nth expected output.
+ if [ '!' -f $EXP ]; then
+ break;
+ fi
+
+ section ARGS $Nth $TST $ARGS
+ section IN $Nth $TST $IN
+ EXIT=`cat_section EXIT $Nth $TST`
+ if [ -z "$EXIT" ]; then
+ EXIT=0
+ fi
+
+ cmdargs=""
+ if [ -f $ARGS ]; then
+ cmdargs=`cat $ARGS`
+ fi
+
+ echo -n "running test $TST ($Nth)... "
+
+ if [ "$verbose" = true ]; then
+ echo
+ echo $COLM $TST
+ fi
+
+ if [ '!' -f $IN ] && [ -f $ROOT.in ]; then
+ IN=$ROOT.in;
+ fi
+
+ if [ "$verbose" = true ]; then
+ if [ -f $IN ]; then
+ echo "${VALGRIND}./$BIN $cmdargs < $IN > $OUT 2>> $LOG"
+ else
+ echo "${VALGRIND}./$BIN $cmdargs > $OUT 2>>$LOG"
+ fi
+ fi
+
+ # Execution
+ if [ -f $IN ]; then
+ ${VALGRIND}./$BIN $cmdargs < $IN > $OUT 2>> $LOG
+ else
+ ${VALGRIND}./$BIN $cmdargs > $OUT 2>>$LOG
+ fi
+
+ e=$?
+ if [ $e != "$EXIT" ]; then
+ echo "FAILED: exit value error: got: $e expected: $EXIT"
+ ERRORS=$(( ERRORS + 1 ))
+ Nth=$((Nth + 1))
+ continue
+ fi
+
+
+ # Diff of output
+ diff -u $EXP $OUT > $DIFF
+ if [ $? != 0 ]; then
+ echo "FAILED: output differs from expected output"
+ ERRORS=$(( ERRORS + 1 ))
+ Nth=$((Nth + 1))
+ if [ "$diff" = true ]; then
+ echo
+ cat $DIFF
+ echo
+ fi
+ continue
+ fi
+
+ echo ok
+ Nth=$((Nth + 1))
+ done
+ done
+
+ if [ $ERRORS != 0 ]; then
+ [ $ERRORS != 1 ] && plural="s";
+ echo
+ echo "TESTING FAILED: $ERRORS failure$plural"
+ echo
+ EXIT=1
+ fi
+}
+
+[ -d $workingdir ] || mkdir $workingdir
+
+runtests;
+
+exit $EXIT;
+
diff --git a/test/scope1.lm b/test/scope1.lm
new file mode 100644
index 0000000..e0886d3
--- /dev/null
+++ b/test/scope1.lm
@@ -0,0 +1,36 @@
+##### LM #####
+int f()
+{
+ i: int = 0
+ j: int = 100
+
+ while i < 4 {
+ j: int = 200
+ if ( i < 1 ) {
+ j: int = 300
+ print( "i: [$i] j: [$j]\n" )
+ }
+ elsif ( i < 2 ) {
+ j: int = 300
+ print( "i: [$i] j: [$j]\n" )
+ }
+ elsif ( i < 3 ) {
+ print( "i: [$i] j: [$j]\n" )
+ }
+ else {
+ print( "i: [$i] j: [$j]\n" )
+ }
+
+ i = i + 1
+ }
+
+ print( "j: [$j]\n" )
+}
+
+f()
+##### EXP #####
+i: 0 j: 300
+i: 1 j: 300
+i: 2 j: 200
+i: 3 j: 200
+j: 100
diff --git a/test/sprintf.lm b/test/sprintf.lm
new file mode 100644
index 0000000..b2a65fa
--- /dev/null
+++ b/test/sprintf.lm
@@ -0,0 +1,4 @@
+##### LM #####
+print( sprintf( "%08x\n" (256 + 11 * 16) ) )
+##### EXP #####
+000001b0
diff --git a/test/string.lm b/test/string.lm
new file mode 100644
index 0000000..ea41cb2
--- /dev/null
+++ b/test/string.lm
@@ -0,0 +1,60 @@
+##### LM #####
+lex
+ token str_escape /'\\' any/
+ token str_chr /[^\\"]+/
+end
+
+def str_item
+ [str_escape]
+| [str_chr]
+
+def string
+ [`" str_item* `"]
+
+lex
+ token ident /[a-zA-Z_]+/
+ token number /[0-9]+/
+
+ literal `+ `* `; `" `' `( `)
+ literal `+= `-= `*=
+
+ ignore wp /[ \t\n]+/
+end
+
+def expr
+ [expr `+ term]
+| [term]
+
+def term
+ [term `* primary]
+| [primary]
+
+def primary
+ [number]
+| [ident]
+| [string]
+| [`( expr `)]
+
+def expr_list
+ [expr_list expr `;]
+| []
+
+def start
+ [expr_list]
+ {
+ if match lhs
+ ~a + "%{{"; 1 * 2;
+ {
+ print( 'yes\n' )
+ }
+ }
+
+parse S: start[stdin]
+print_xml( S )
+print( '\n' )
+##### IN #####
+a + "%{{"; 1 * 2;
+
+##### EXP #####
+yes
+<start><expr_list><expr_list><expr_list></expr_list><expr><expr><term><primary><ident>a</ident></primary></term></expr><_literal_0009>+</_literal_0009><term><primary><string><_literal_000f>"</_literal_000f><_repeat_str_item><str_item><str_chr>%{{</str_chr></str_item></_repeat_str_item><_literal_000f>"</_literal_000f></string></primary></term></expr><_literal_000d>;</_literal_000d></expr_list><expr><term><term><primary><number>1</number></primary></term><_literal_000b>*</_literal_000b><primary><number>2</number></primary></term></expr><_literal_000d>;</_literal_000d></expr_list></start>
diff --git a/test/superid.lm b/test/superid.lm
new file mode 100644
index 0000000..eb19020
--- /dev/null
+++ b/test/superid.lm
@@ -0,0 +1,76 @@
+##### LM #####
+context si
+ lex
+ literal `! `a
+
+ token SEMI_NL /';\n'/
+
+ token id /'a'|'b'/
+ {
+ input.push( make_token( trans_id_to input.pull(match_length) ) )
+ }
+
+ token super_id //
+ token foo //
+
+ ignore ws / [ \n\t]+ /
+ end
+
+ trans_id_to: int
+
+ def e1
+ []
+ {
+ print( 'old_id = ' trans_id_to '\n' )
+ trans_id_to = typeid<foo>
+ print( 'new_id = ' trans_id_to '\n' )
+ }
+
+ def item1
+ msg: str
+
+ [ e1 `! `a super_id super_id `a]
+ {
+ lhs.msg = 'this is item1\n'
+ }
+
+ def e2
+ []
+ {
+ print( 'old_id = ' trans_id_to '\n' )
+ trans_id_to = typeid<super_id>
+ print( 'new_id = ' trans_id_to '\n' )
+ }
+
+ def item2
+ msg: str
+
+ [ e2 `! `a super_id super_id `a]
+ {
+ lhs.msg = 'this is item2\n'
+ }
+
+
+ def start
+ [item1 SEMI_NL]
+ | [item2 SEMI_NL]
+ {
+ match lhs [Item2:item2 ';\n']
+ print( Item2.msg )
+ }
+end # si
+
+cons SuperId: si[]
+parse S: si::start(SuperId)[stdin]
+print_xml( S )
+print( '\n' )
+##### IN #####
+!a b b a;
+##### EXP #####
+old_id = NIL
+new_id = 13
+old_id = NIL
+new_id = 12
+this is item2
+<si::start><si::item2><si::e2></si::e2><si::_literal_0001>!</si::_literal_0001><si::_literal_0003>a</si::_literal_0003><si::super_id>b</si::super_id><si::super_id>b</si::super_id><si::_literal_0003>a</si::_literal_0003></si::item2><si::SEMI_NL>;
+</si::SEMI_NL></si::start>
diff --git a/test/tags1.lm b/test/tags1.lm
new file mode 100644
index 0000000..ef17c46
--- /dev/null
+++ b/test/tags1.lm
@@ -0,0 +1,93 @@
+##### LM #####
+context tags
+ # Open and close tags by rewriting to generic close tags. Won't work if
+ # interested in unclosed tags because a token can start as not close_id, but
+ # then become a close id during the course of parsing.
+
+ #
+ # Regular Definitions
+ #
+ rl rl_ws /[ \t\n\r\v]+/
+ rl rl_id /[a-zA-Z_][a-zA-Z0-9_]*/
+
+ #
+ # Tokens
+ #
+
+ # Any single character can be a literal
+ lex
+ token BANG_NL /'!\n'/
+ token SEMI_NL /';\n'/
+
+ # Ignore whitespace.
+ ignore /rl_ws/
+
+ # Open and close id
+ token id /rl_id/
+ end
+
+ #
+ # Global Data
+ #
+
+ def tag_stack
+ [id tag_stack]
+ | []
+
+ TS: tag_stack
+
+ #
+ # Productions
+ #
+
+ def open_tag
+ [id]
+ {
+ match lhs [Id:id]
+ match TS [Top:id Rest:tag_stack]
+ if Id.data == Top.data {
+ reject
+ } else {
+ TS = construct tag_stack [Id TS]
+ }
+ }
+
+ def close_tag
+ [id]
+ {
+ match lhs [Id: id]
+ match TS [Top: id Rest: tag_stack]
+
+ if Id.data == Top.data
+ TS = construct tag_stack [Rest]
+ else
+ reject
+ }
+
+ def tag
+ [open_tag tag* close_tag]
+
+ def start
+ [tag* SEMI_NL]
+ {
+ print_xml( TS )
+ print_xml( lhs )
+ print( 'got structure\n' )
+ }
+
+ | [id* SEMI_NL]
+ {
+ print_xml( TS )
+ print_xml( lhs )
+ print( 'failed\n' )
+ }
+end # tags
+
+cons Tags: tags[]
+Tags.TS = cons tags::tag_stack ["sentinal"]
+parse tags::start(Tags)[stdin]
+##### IN #####
+y y a i i b c c m m n n b a;
+##### EXP #####
+<tags::tag_stack><tags::id>sentinal</tags::id><tags::tag_stack></tags::tag_stack></tags::tag_stack><tags::start><tags::_repeat_tag><tags::tag><tags::open_tag><tags::id>y</tags::id></tags::open_tag><tags::_repeat_tag></tags::_repeat_tag><tags::close_tag><tags::id>y</tags::id></tags::close_tag></tags::tag><tags::tag><tags::open_tag><tags::id>a</tags::id></tags::open_tag><tags::_repeat_tag><tags::tag><tags::open_tag><tags::id>i</tags::id></tags::open_tag><tags::_repeat_tag></tags::_repeat_tag><tags::close_tag><tags::id>i</tags::id></tags::close_tag></tags::tag><tags::tag><tags::open_tag><tags::id>b</tags::id></tags::open_tag><tags::_repeat_tag><tags::tag><tags::open_tag><tags::id>c</tags::id></tags::open_tag><tags::_repeat_tag></tags::_repeat_tag><tags::close_tag><tags::id>c</tags::id></tags::close_tag></tags::tag><tags::tag><tags::open_tag><tags::id>m</tags::id></tags::open_tag><tags::_repeat_tag></tags::_repeat_tag><tags::close_tag><tags::id>m</tags::id></tags::close_tag></tags::tag><tags::tag><tags::open_tag><tags::id>n</tags::id></tags::open_tag><tags::_repeat_tag></tags::_repeat_tag><tags::close_tag><tags::id>n</tags::id></tags::close_tag></tags::tag></tags::_repeat_tag><tags::close_tag><tags::id>b</tags::id></tags::close_tag></tags::tag></tags::_repeat_tag><tags::close_tag><tags::id>a</tags::id></tags::close_tag></tags::tag></tags::_repeat_tag><tags::SEMI_NL>;
+</tags::SEMI_NL></tags::start>got structure
diff --git a/test/tags2.lm b/test/tags2.lm
new file mode 100644
index 0000000..e83b113
--- /dev/null
+++ b/test/tags2.lm
@@ -0,0 +1,4183 @@
+##### LM #####
+#
+# Definitions
+#
+
+rl xml_digit / (0x30..0x39) /
+
+rl base_char / 0x41..0x5A | 0x61..0x7A /
+
+rl char / 0x9 | 0xA | 0xD | 0x20..0x7f /
+
+rl letter / base_char /
+
+rl name_char / letter | digit | '.' | '-' | '_' | ':' | 0xb7 /
+
+rl name / (letter | '_' | ':') name_char* /
+
+#
+# Reference definitions. These appear in the
+# top level and also in strings.
+#
+
+rl entity_ref_pat / '&' name ';' /
+
+rl char_ref_pat / '&#' [0-9]+ ';' | '&0x' [0-9a-fA-F]+ ';' /
+
+#
+# Single quotes.
+#
+lex
+ token sq_close /'\''/
+
+ # References in single quotes
+ token sq_entity_ref /entity_ref_pat/
+ token sq_char_ref /char_ref_pat/
+
+ token sq_data / [^<&']+ /
+
+ def sq_item
+ [ sq_data ]
+ | [ sq_entity_ref ]
+ | [ sq_char_ref ]
+
+ # The opening quote belongs to the tag region.
+ def sq_string
+ [ `' sq_item* sq_close ]
+end
+
+#
+# Double quotes.
+#
+lex
+ token dq_close /'"'/
+
+ # References in double quotes
+ token dq_entity_ref /entity_ref_pat/
+ token dq_char_ref /char_ref_pat/
+
+ token dq_data / [^<&"]+ /
+
+ def dq_item
+ [ dq_data ]
+ | [ dq_entity_ref ]
+ | [ dq_char_ref ]
+
+ # The opening quote belongs to the tag region.
+ def dq_string
+ [ `" dq_item* dq_close ]
+end
+
+#
+# Tag elements.
+#
+lex
+ literal `' `" `= `/
+
+ # Within this region whitespace is not significant.
+ ignore xml_space / (0x20 | 0x9 | 0xD | 0xA)+ /
+
+ #
+ # Attributes
+ #
+ token attr_name / name /
+end
+
+literal `>
+
+#
+# Top Level
+#
+lex
+ #
+ # Comments
+ #
+
+ # Cannot contain '--'
+ rl char_no_dash / char - '-' /
+ token comment / '<!--' ( char_no_dash | '-' char_no_dash )* '-->' /
+
+
+ # Opening a tag.
+ literal `<
+
+ #
+ # Character Data
+ #
+
+ token cdata / '<![CDATA[' char* :> ']]>'/
+ token char_data / [^<&]+ /
+ token entity_ref /entity_ref_pat/
+ token char_ref /char_ref_pat/
+end
+
+
+def attribute_value
+ [ sq_string ]
+| [ dq_string ]
+
+def attribute
+ [ attr_name `= attribute_value ]
+
+def empty_tag
+ [ `< attr_name attribute* `/ `> ]
+
+def close_tag
+ [ `< `/ attr_name `> ]
+
+def open_tag
+ [ `< attr_name attribute* `> ]
+
+def tag
+ [open_tag content close_tag]
+
+def content_item
+ [tag]
+| [empty_tag]
+| [char_data]
+| [entity_ref]
+| [char_ref]
+| [cdata]
+| [comment]
+
+def content
+ [content_item*]
+
+def document
+ [content]
+
+def start
+ [document]
+
+parse S: start[stdin]
+
+for Switch:tag in S {
+ if match Switch
+ ["<lm_switch>" SwitchContent:content "</lm_switch>"]
+ {
+ print( 'SWITCH\n' )
+ for Text:tag in SwitchContent {
+ if match Text
+ ["<text>" TextContent:content "</text>"]
+ {
+ print( ' ' TextContent '\n' )
+ }
+ }
+ }
+}
+##### IN #####
+<ragel version="5.24" filename="../colm/lmscan.rl" lang="C">
+<ragel_def name="rlscan">
+ <alphtype>char</alphtype>
+ <machine>
+ <action_list length="166">
+ <action id="0" name="inc_nl" line="217" col="16"><text>
+ lastnl = p;
+ column = 0;
+ line++;
+ </text></action>
+ <action id="1" name="initts" line="1" col="1"><init_tokstart></init_tokstart></action>
+ <action id="2" name="tokstart" line="1" col="1"><set_tokstart></set_tokstart></action>
+ <action id="3" name="tokend" line="1" col="1"><set_tokend>1</set_tokend></action>
+ <action id="4" name="last1" line="238" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\0' ); </text></sub_action></action>
+ <action id="5" name="last2" line="239" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\a' ); </text></sub_action></action>
+ <action id="6" name="last3" line="240" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\b' ); </text></sub_action></action>
+ <action id="7" name="last4" line="241" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\t' ); </text></sub_action></action>
+ <action id="8" name="last5" line="242" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\n' ); </text></sub_action></action>
+ <action id="9" name="last6" line="243" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\v' ); </text></sub_action></action>
+ <action id="10" name="last7" line="244" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\f' ); </text></sub_action></action>
+ <action id="11" name="last8" line="245" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, '\r' ); </text></sub_action></action>
+ <action id="12" name="last9" line="246" col="13"><set_tokend>1</set_tokend><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="13" name="last10" line="247" col="15"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, tokstart+1, tokend ); </text></sub_action></action>
+ <action id="14" name="last11" line="250" col="10"><set_tokend>1</set_tokend><sub_action><text> token( RE_Dash, 0, 0 ); </text></sub_action></action>
+ <action id="15" name="last12" line="253" col="10"><set_tokend>1</set_tokend><sub_action><text> token( RE_SqClose ); </text><ret></ret><text> </text></sub_action></action>
+ <action id="16" name="last13" line="255" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ scan_error() &lt;&lt; "unterminated OR literal" &lt;&lt; endl;
+ </text></sub_action></action>
+ <action id="17" name="last14" line="260" col="12"><set_tokend>1</set_tokend><sub_action><text> token( RE_Char, tokstart, tokend ); </text></sub_action></action>
+ <action id="18" name="store15" line="265" col="13"><set_act>15</set_act></action>
+ <action id="19" name="store16" line="266" col="12"><set_act>16</set_act></action>
+ <action id="20" name="store17" line="267" col="12"><set_act>17</set_act></action>
+ <action id="21" name="store18" line="268" col="13"><set_act>18</set_act></action>
+ <action id="22" name="store19" line="269" col="11"><set_act>19</set_act></action>
+ <action id="23" name="store20" line="270" col="13"><set_act>20</set_act></action>
+ <action id="24" name="store21" line="273" col="12"><set_act>21</set_act></action>
+ <action id="25" name="last24" line="281" col="7"><set_tokend>1</set_tokend><sub_action><text> token( TK_Literal, tokstart, tokend ); </text></sub_action></action>
+ <action id="26" name="last26" line="284" col="11"><set_tokend>1</set_tokend><sub_action><text> token( RE_SqOpenNeg ); </text><call>166</call><text> </text></sub_action></action>
+ <action id="27" name="last27" line="286" col="10"><set_tokend>1</set_tokend><sub_action><text> token( '/'); </text><ret></ret><text> </text></sub_action></action>
+ <action id="28" name="last28" line="289" col="20"><set_tokend>1</set_tokend><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="29" name="last29" line="291" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_ColonEquals ); </text></sub_action></action>
+ <action id="30" name="last30" line="294" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StartToState ); </text></sub_action></action>
+ <action id="31" name="last31" line="295" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AllToState ); </text></sub_action></action>
+ <action id="32" name="last32" line="296" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_FinalToState ); </text></sub_action></action>
+ <action id="33" name="last33" line="297" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotStartToState ); </text></sub_action></action>
+ <action id="34" name="last34" line="298" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotFinalToState ); </text></sub_action></action>
+ <action id="35" name="last35" line="299" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_MiddleToState ); </text></sub_action></action>
+ <action id="36" name="last36" line="302" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StartFromState ); </text></sub_action></action>
+ <action id="37" name="last37" line="303" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AllFromState ); </text></sub_action></action>
+ <action id="38" name="last38" line="304" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_FinalFromState ); </text></sub_action></action>
+ <action id="39" name="last39" line="305" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotStartFromState ); </text></sub_action></action>
+ <action id="40" name="last40" line="306" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotFinalFromState ); </text></sub_action></action>
+ <action id="41" name="last41" line="307" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_MiddleFromState ); </text></sub_action></action>
+ <action id="42" name="last42" line="310" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StartEOF ); </text></sub_action></action>
+ <action id="43" name="last43" line="311" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AllEOF ); </text></sub_action></action>
+ <action id="44" name="last44" line="312" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_FinalEOF ); </text></sub_action></action>
+ <action id="45" name="last45" line="313" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotStartEOF ); </text></sub_action></action>
+ <action id="46" name="last46" line="314" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotFinalEOF ); </text></sub_action></action>
+ <action id="47" name="last47" line="315" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_MiddleEOF ); </text></sub_action></action>
+ <action id="48" name="last48" line="318" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StartGblError ); </text></sub_action></action>
+ <action id="49" name="last49" line="319" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AllGblError ); </text></sub_action></action>
+ <action id="50" name="last50" line="320" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_FinalGblError ); </text></sub_action></action>
+ <action id="51" name="last51" line="321" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotStartGblError ); </text></sub_action></action>
+ <action id="52" name="last52" line="322" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotFinalGblError ); </text></sub_action></action>
+ <action id="53" name="last53" line="323" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_MiddleGblError ); </text></sub_action></action>
+ <action id="54" name="last54" line="326" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StartLocalError ); </text></sub_action></action>
+ <action id="55" name="last55" line="327" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AllLocalError ); </text></sub_action></action>
+ <action id="56" name="last56" line="328" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_FinalLocalError ); </text></sub_action></action>
+ <action id="57" name="last57" line="329" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotStartLocalError ); </text></sub_action></action>
+ <action id="58" name="last58" line="330" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotFinalLocalError ); </text></sub_action></action>
+ <action id="59" name="last59" line="331" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_MiddleLocalError ); </text></sub_action></action>
+ <action id="60" name="last61" line="337" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StartCond ); </text></sub_action></action>
+ <action id="61" name="last62" line="338" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AllCond ); </text></sub_action></action>
+ <action id="62" name="last63" line="339" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_LeavingCond ); </text></sub_action></action>
+ <action id="63" name="last64" line="341" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_DotDot ); </text></sub_action></action>
+ <action id="64" name="last65" line="342" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_StarStar ); </text></sub_action></action>
+ <action id="65" name="last66" line="343" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_DashDash ); </text></sub_action></action>
+ <action id="66" name="last67" line="344" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_Arrow ); </text></sub_action></action>
+ <action id="67" name="last69" line="347" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_ColonGtGt ); </text></sub_action></action>
+ <action id="68" name="last70" line="348" col="12"><set_tokend>1</set_tokend><sub_action><text> token( TK_LtColon ); </text></sub_action></action>
+ <action id="69" name="last72" line="354" col="9"><set_tokend>1</set_tokend><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="70" name="last73" line="357" col="6"><set_tokend>1</set_tokend></action>
+ <action id="71" name="last74" line="359" col="10"><set_tokend>1</set_tokend><sub_action><text> token( *tokstart ); </text></sub_action></action>
+ <action id="72" name="next21" line="273" col="12"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_Word, tokstart, tokend ); </text></sub_action></action>
+ <action id="73" name="next22" line="276" col="13"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_UInt, tokstart, tokend ); </text></sub_action></action>
+ <action id="74" name="next23" line="277" col="17"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_Hex, tokstart, tokend ); </text></sub_action></action>
+ <action id="75" name="next24" line="281" col="7"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_Literal, tokstart, tokend ); </text></sub_action></action>
+ <action id="76" name="next25" line="283" col="10"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( RE_SqOpen ); </text><call>166</call><text> </text></sub_action></action>
+ <action id="77" name="next60" line="334" col="11"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_Middle ); </text></sub_action></action>
+ <action id="78" name="next68" line="346" col="12"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_ColonGt ); </text></sub_action></action>
+ <action id="79" name="next71" line="351" col="15"><set_tokend>0</set_tokend><hold></hold><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="80" name="next74" line="359" col="10"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( *tokstart ); </text></sub_action></action>
+ <action id="81" name="lag22" line="276" col="13"><exec><get_tokend></get_tokend></exec><sub_action><text> token( TK_UInt, tokstart, tokend ); </text></sub_action></action>
+ <action id="82" name="switch" line="1" col="1"><lm_switch>
+ <sub_action id="15"><exec><get_tokend></get_tokend></exec><text> token( KW_When ); </text></sub_action>
+ <sub_action id="16"><exec><get_tokend></get_tokend></exec><text> token( KW_Eof ); </text></sub_action>
+ <sub_action id="17"><exec><get_tokend></get_tokend></exec><text> token( KW_Err ); </text></sub_action>
+ <sub_action id="18"><exec><get_tokend></get_tokend></exec><text> token( KW_Lerr ); </text></sub_action>
+ <sub_action id="19"><exec><get_tokend></get_tokend></exec><text> token( KW_To ); </text></sub_action>
+ <sub_action id="20"><exec><get_tokend></get_tokend></exec><text> token( KW_From ); </text></sub_action>
+ <sub_action id="21"><exec><get_tokend></get_tokend></exec><text> token( TK_Word, tokstart, tokend ); </text></sub_action>
+ </lm_switch></action>
+ <action id="83" name="last75" line="363" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\a' ); </text></sub_action></action>
+ <action id="84" name="last76" line="364" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\b' ); </text></sub_action></action>
+ <action id="85" name="last77" line="365" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\t' ); </text></sub_action></action>
+ <action id="86" name="last78" line="366" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\n' ); </text></sub_action></action>
+ <action id="87" name="last79" line="367" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\v' ); </text></sub_action></action>
+ <action id="88" name="last80" line="368" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\f' ); </text></sub_action></action>
+ <action id="89" name="last81" line="369" col="12"><set_tokend>1</set_tokend><sub_action><text> litBuf.append( '\r' ); </text></sub_action></action>
+ <action id="90" name="last82" line="371" col="12"><set_tokend>1</set_tokend><sub_action><text>
+ litBuf.append( tokstart[1] );
+ </text></sub_action></action>
+ <action id="91" name="last83" line="374" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ if ( litBuf.length &gt; 0 ) {
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '"' );
+ </text><ret></ret><text>
+ </text></sub_action></action>
+ <action id="92" name="last84" line="382" col="9"><set_tokend>1</set_tokend><sub_action><text>
+ if ( litBuf.length &gt; 0 ) {
+ litBuf.append( '\n' );
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '"' );
+ </text><ret></ret><text>
+ </text></sub_action></action>
+ <action id="93" name="last85" line="391" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ if ( litBuf.length &gt; 0 ) {
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '[' );
+ </text><call>10</call><text>
+ </text></sub_action></action>
+ <action id="94" name="last86" line="399" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ litBuf.append( *tokstart );
+ </text></sub_action></action>
+ <action id="95" name="store87" line="406" col="12"><set_act>87</set_act></action>
+ <action id="96" name="store88" line="407" col="15"><set_act>88</set_act></action>
+ <action id="97" name="store89" line="408" col="17"><set_act>89</set_act></action>
+ <action id="98" name="store90" line="409" col="15"><set_act>90</set_act></action>
+ <action id="99" name="store91" line="410" col="13"><set_act>91</set_act></action>
+ <action id="100" name="store92" line="411" col="14"><set_act>92</set_act></action>
+ <action id="101" name="store93" line="412" col="18"><set_act>93</set_act></action>
+ <action id="102" name="store94" line="413" col="14"><set_act>94</set_act></action>
+ <action id="103" name="store95" line="414" col="16"><set_act>95</set_act></action>
+ <action id="104" name="store96" line="415" col="16"><set_act>96</set_act></action>
+ <action id="105" name="store97" line="416" col="13"><set_act>97</set_act></action>
+ <action id="106" name="store98" line="417" col="15"><set_act>98</set_act></action>
+ <action id="107" name="store99" line="418" col="16"><set_act>99</set_act></action>
+ <action id="108" name="store101" line="420" col="14"><set_act>101</set_act></action>
+ <action id="109" name="store102" line="421" col="12"><set_act>102</set_act></action>
+ <action id="110" name="store103" line="422" col="12"><set_act>103</set_act></action>
+ <action id="111" name="store104" line="424" col="11"><set_act>104</set_act></action>
+ <action id="112" name="store105" line="425" col="12"><set_act>105</set_act></action>
+ <action id="113" name="store106" line="426" col="15"><set_act>106</set_act></action>
+ <action id="114" name="store107" line="427" col="12"><set_act>107</set_act></action>
+ <action id="115" name="store108" line="428" col="16"><set_act>108</set_act></action>
+ <action id="116" name="store109" line="429" col="18"><set_act>109</set_act></action>
+ <action id="117" name="store110" line="430" col="12"><set_act>110</set_act></action>
+ <action id="118" name="store112" line="432" col="16"><set_act>112</set_act></action>
+ <action id="119" name="store113" line="433" col="17"><set_act>113</set_act></action>
+ <action id="120" name="store114" line="434" col="11"><set_act>114</set_act></action>
+ <action id="121" name="store115" line="435" col="13"><set_act>115</set_act></action>
+ <action id="122" name="store116" line="436" col="15"><set_act>116</set_act></action>
+ <action id="123" name="store117" line="437" col="14"><set_act>117</set_act></action>
+ <action id="124" name="store118" line="438" col="13"><set_act>118</set_act></action>
+ <action id="125" name="store119" line="439" col="18"><set_act>119</set_act></action>
+ <action id="126" name="store120" line="440" col="13"><set_act>120</set_act></action>
+ <action id="127" name="store121" line="441" col="14"><set_act>121</set_act></action>
+ <action id="128" name="store122" line="442" col="12"><set_act>122</set_act></action>
+ <action id="129" name="store123" line="443" col="13"><set_act>123</set_act></action>
+ <action id="130" name="store124" line="444" col="13"><set_act>124</set_act></action>
+ <action id="131" name="store125" line="445" col="13"><set_act>125</set_act></action>
+ <action id="132" name="store126" line="446" col="18"><set_act>126</set_act></action>
+ <action id="133" name="store127" line="447" col="13"><set_act>127</set_act></action>
+ <action id="134" name="store128" line="448" col="11"><set_act>128</set_act></action>
+ <action id="135" name="store129" line="449" col="18"><set_act>129</set_act></action>
+ <action id="136" name="store130" line="450" col="16"><set_act>130</set_act></action>
+ <action id="137" name="store131" line="453" col="12"><set_act>131</set_act></action>
+ <action id="138" name="last133" line="457" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ token( '/' );
+ </text><call>168</call><text>
+ </text></sub_action></action>
+ <action id="139" name="last134" line="462" col="20"><set_tokend>1</set_tokend><sub_action><text>
+ token( '"' );
+ token( TK_LitPat, tokstart+1, tokend );
+ token( '"' );
+ </text></sub_action></action>
+ <action id="140" name="last135" line="468" col="16"><set_tokend>1</set_tokend><sub_action><text>
+ token( TK_Literal, tokstart, tokend );
+ </text></sub_action></action>
+ <action id="141" name="last136" line="472" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ token( '"' );
+ litBuf.clear();
+ </text><call>203</call><text>
+ </text></sub_action></action>
+ <action id="142" name="last137" line="477" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ token( '[' );
+ </text><call>10</call><text>
+ </text></sub_action></action>
+ <action id="143" name="last138" line="482" col="10"><set_tokend>1</set_tokend><sub_action><text>
+ token( ']' );
+ if ( top &gt; 0 )
+ </text><ret></ret><text>
+ </text></sub_action></action>
+ <action id="144" name="last139" line="489" col="20"><set_tokend>1</set_tokend><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="145" name="last140" line="491" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_ColonEquals ); </text></sub_action></action>
+ <action id="146" name="last141" line="492" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_DoubleArrow ); </text></sub_action></action>
+ <action id="147" name="last142" line="493" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_DoubleEquals ); </text></sub_action></action>
+ <action id="148" name="last143" line="494" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_NotEquals ); </text></sub_action></action>
+ <action id="149" name="last144" line="495" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_DoubleColon ); </text></sub_action></action>
+ <action id="150" name="last145" line="496" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_LessEquals ); </text></sub_action></action>
+ <action id="151" name="last146" line="497" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_GreaterEquals ); </text></sub_action></action>
+ <action id="152" name="last147" line="498" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_LeftArrow ); </text></sub_action></action>
+ <action id="153" name="last148" line="499" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_AmpAmp ); </text></sub_action></action>
+ <action id="154" name="last149" line="500" col="11"><set_tokend>1</set_tokend><sub_action><text> token( TK_BarBar ); </text></sub_action></action>
+ <action id="155" name="last150" line="502" col="43"><set_tokend>1</set_tokend><sub_action><text> token( *tokstart ); </text></sub_action></action>
+ <action id="156" name="last152" line="509" col="9"><set_tokend>1</set_tokend><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="157" name="last153" line="512" col="6"><set_tokend>1</set_tokend></action>
+ <action id="158" name="last154" line="514" col="10"><set_tokend>1</set_tokend><sub_action><text> token( *tokstart ); </text></sub_action></action>
+ <action id="159" name="next100" line="419" col="12"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( KW_Pri ); </text></sub_action></action>
+ <action id="160" name="next111" line="431" col="14"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( KW_Print ); </text></sub_action></action>
+ <action id="161" name="next131" line="453" col="12"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_Word, tokstart, tokend ); </text></sub_action></action>
+ <action id="162" name="next132" line="455" col="13"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( TK_Number, tokstart, tokend ); </text></sub_action></action>
+ <action id="163" name="next151" line="506" col="15"><set_tokend>0</set_tokend><hold></hold><sub_action><text> updateCol(); </text></sub_action></action>
+ <action id="164" name="next154" line="514" col="10"><set_tokend>0</set_tokend><hold></hold><sub_action><text> token( *tokstart ); </text></sub_action></action>
+ <action id="165" name="switch" line="1" col="1"><lm_switch>
+ <sub_action id="87"><exec><get_tokend></get_tokend></exec><text> token( KW_Lex ); </text></sub_action>
+ <sub_action id="88"><exec><get_tokend></get_tokend></exec><text> token( KW_Action ); </text></sub_action>
+ <sub_action id="89"><exec><get_tokend></get_tokend></exec><text> token( KW_AlphType ); </text></sub_action>
+ <sub_action id="90"><exec><get_tokend></get_tokend></exec><text> token( KW_Commit ); </text></sub_action>
+ <sub_action id="91"><exec><get_tokend></get_tokend></exec><text> token( KW_Undo ); </text></sub_action>
+ <sub_action id="92"><exec><get_tokend></get_tokend></exec><text> token( KW_Final ); </text></sub_action>
+ <sub_action id="93"><exec><get_tokend></get_tokend></exec><text> token( KW_Translate ); </text></sub_action>
+ <sub_action id="94"><exec><get_tokend></get_tokend></exec><text> token( KW_Token ); </text></sub_action>
+ <sub_action id="95"><exec><get_tokend></get_tokend></exec><text> token( KW_Literal ); </text></sub_action>
+ <sub_action id="96"><exec><get_tokend></get_tokend></exec><text> token( KW_NonTerm ); </text></sub_action>
+ <sub_action id="97"><exec><get_tokend></get_tokend></exec><text> token( KW_Uses ); </text></sub_action>
+ <sub_action id="98"><exec><get_tokend></get_tokend></exec><text> token( KW_Parser ); </text></sub_action>
+ <sub_action id="99"><exec><get_tokend></get_tokend></exec><text> token( KW_Include ); </text></sub_action>
+ <sub_action id="101"><exec><get_tokend></get_tokend></exec><text> token( KW_Write ); </text></sub_action>
+ <sub_action id="102"><exec><get_tokend></get_tokend></exec><text> token( KW_Nfa ); </text></sub_action>
+ <sub_action id="103"><exec><get_tokend></get_tokend></exec><text> token( KW_Pda ); </text></sub_action>
+ <sub_action id="104"><exec><get_tokend></get_tokend></exec><text> token( KW_Rl ); </text></sub_action>
+ <sub_action id="105"><exec><get_tokend></get_tokend></exec><text> token( KW_Cfl ); </text></sub_action>
+ <sub_action id="106"><exec><get_tokend></get_tokend></exec><text> token( KW_Ignore ); </text></sub_action>
+ <sub_action id="107"><exec><get_tokend></get_tokend></exec><text> token( KW_End ); </text></sub_action>
+ <sub_action id="108"><exec><get_tokend></get_tokend></exec><text> token( KW_Pattern ); </text></sub_action>
+ <sub_action id="109"><exec><get_tokend></get_tokend></exec><text> token( KW_Construct ); </text></sub_action>
+ <sub_action id="110"><exec><get_tokend></get_tokend></exec><text> token( KW_Red ); </text></sub_action>
+ <sub_action id="112"><exec><get_tokend></get_tokend></exec><text> token( KW_TypeId ); </text></sub_action>
+ <sub_action id="113"><exec><get_tokend></get_tokend></exec><text> token( KW_TypeDef ); </text></sub_action>
+ <sub_action id="114"><exec><get_tokend></get_tokend></exec><text> token( KW_If ); </text></sub_action>
+ <sub_action id="115"><exec><get_tokend></get_tokend></exec><text> token( KW_Init ); </text></sub_action>
+ <sub_action id="116"><exec><get_tokend></get_tokend></exec><text> token( KW_Reject ); </text></sub_action>
+ <sub_action id="117"><exec><get_tokend></get_tokend></exec><text> token( KW_While ); </text></sub_action>
+ <sub_action id="118"><exec><get_tokend></get_tokend></exec><text> token( KW_Else ); </text></sub_action>
+ <sub_action id="119"><exec><get_tokend></get_tokend></exec><text> token( KW_SubParser ); </text></sub_action>
+ <sub_action id="120"><exec><get_tokend></get_tokend></exec><text> token( KW_Next ); </text></sub_action>
+ <sub_action id="121"><exec><get_tokend></get_tokend></exec><text> token( KW_Match ); </text></sub_action>
+ <sub_action id="122"><exec><get_tokend></get_tokend></exec><text> token( KW_For ); </text></sub_action>
+ <sub_action id="123"><exec><get_tokend></get_tokend></exec><text> token( KW_Iter ); </text></sub_action>
+ <sub_action id="124"><exec><get_tokend></get_tokend></exec><text> token( KW_Find ); </text></sub_action>
+ <sub_action id="125"><exec><get_tokend></get_tokend></exec><text> token( KW_Root ); </text></sub_action>
+ <sub_action id="126"><exec><get_tokend></get_tokend></exec><text> token( KW_PrintXML ); </text></sub_action>
+ <sub_action id="127"><exec><get_tokend></get_tokend></exec><text> token( KW_Then ); </text></sub_action>
+ <sub_action id="128"><exec><get_tokend></get_tokend></exec><text> token( KW_Do ); </text></sub_action>
+ <sub_action id="129"><exec><get_tokend></get_tokend></exec><text> token( KW_Namespace ); </text></sub_action>
+ <sub_action id="130"><exec><get_tokend></get_tokend></exec><text> token( KW_Scanner ); </text></sub_action>
+ <sub_action id="131"><exec><get_tokend></get_tokend></exec><text> token( TK_Word, tokstart, tokend ); </text></sub_action>
+ </lm_switch></action>
+ </action_list>
+ <action_table_list length="166">
+ <action_table id="0" length="2">0 144</action_table>
+ <action_table id="1" length="1">0</action_table>
+ <action_table id="2" length="1">140</action_table>
+ <action_table id="3" length="2">0 139</action_table>
+ <action_table id="4" length="2">0 28</action_table>
+ <action_table id="5" length="1">81</action_table>
+ <action_table id="6" length="1">1</action_table>
+ <action_table id="7" length="1">2</action_table>
+ <action_table id="8" length="1">158</action_table>
+ <action_table id="9" length="1">157</action_table>
+ <action_table id="10" length="2">0 156</action_table>
+ <action_table id="11" length="1">141</action_table>
+ <action_table id="12" length="1">3</action_table>
+ <action_table id="13" length="1">155</action_table>
+ <action_table id="14" length="1">138</action_table>
+ <action_table id="15" length="2">3 137</action_table>
+ <action_table id="16" length="1">142</action_table>
+ <action_table id="17" length="1">143</action_table>
+ <action_table id="18" length="1">163</action_table>
+ <action_table id="19" length="1">164</action_table>
+ <action_table id="20" length="1">148</action_table>
+ <action_table id="21" length="1">153</action_table>
+ <action_table id="22" length="1">162</action_table>
+ <action_table id="23" length="1">149</action_table>
+ <action_table id="24" length="1">145</action_table>
+ <action_table id="25" length="1">152</action_table>
+ <action_table id="26" length="1">150</action_table>
+ <action_table id="27" length="1">147</action_table>
+ <action_table id="28" length="1">146</action_table>
+ <action_table id="29" length="1">151</action_table>
+ <action_table id="30" length="1">165</action_table>
+ <action_table id="31" length="1">161</action_table>
+ <action_table id="32" length="2">3 96</action_table>
+ <action_table id="33" length="2">3 97</action_table>
+ <action_table id="34" length="2">3 112</action_table>
+ <action_table id="35" length="2">3 98</action_table>
+ <action_table id="36" length="2">3 116</action_table>
+ <action_table id="37" length="2">3 134</action_table>
+ <action_table id="38" length="2">3 124</action_table>
+ <action_table id="39" length="2">3 114</action_table>
+ <action_table id="40" length="2">3 130</action_table>
+ <action_table id="41" length="2">3 100</action_table>
+ <action_table id="42" length="2">3 128</action_table>
+ <action_table id="43" length="2">3 120</action_table>
+ <action_table id="44" length="2">3 113</action_table>
+ <action_table id="45" length="2">3 107</action_table>
+ <action_table id="46" length="2">3 121</action_table>
+ <action_table id="47" length="2">3 129</action_table>
+ <action_table id="48" length="2">3 95</action_table>
+ <action_table id="49" length="2">3 103</action_table>
+ <action_table id="50" length="2">3 127</action_table>
+ <action_table id="51" length="2">3 135</action_table>
+ <action_table id="52" length="2">3 126</action_table>
+ <action_table id="53" length="2">3 109</action_table>
+ <action_table id="54" length="2">3 104</action_table>
+ <action_table id="55" length="2">3 106</action_table>
+ <action_table id="56" length="2">3 115</action_table>
+ <action_table id="57" length="2">3 110</action_table>
+ <action_table id="58" length="1">159</action_table>
+ <action_table id="59" length="1">160</action_table>
+ <action_table id="60" length="2">3 132</action_table>
+ <action_table id="61" length="2">3 111</action_table>
+ <action_table id="62" length="2">3 117</action_table>
+ <action_table id="63" length="2">3 122</action_table>
+ <action_table id="64" length="2">3 131</action_table>
+ <action_table id="65" length="2">3 136</action_table>
+ <action_table id="66" length="2">3 125</action_table>
+ <action_table id="67" length="2">3 133</action_table>
+ <action_table id="68" length="2">3 102</action_table>
+ <action_table id="69" length="2">3 101</action_table>
+ <action_table id="70" length="2">3 119</action_table>
+ <action_table id="71" length="2">3 118</action_table>
+ <action_table id="72" length="2">3 99</action_table>
+ <action_table id="73" length="2">3 105</action_table>
+ <action_table id="74" length="2">3 123</action_table>
+ <action_table id="75" length="2">3 108</action_table>
+ <action_table id="76" length="1">154</action_table>
+ <action_table id="77" length="1">17</action_table>
+ <action_table id="78" length="1">16</action_table>
+ <action_table id="79" length="1">14</action_table>
+ <action_table id="80" length="1">15</action_table>
+ <action_table id="81" length="1">13</action_table>
+ <action_table id="82" length="1">12</action_table>
+ <action_table id="83" length="1">4</action_table>
+ <action_table id="84" length="1">5</action_table>
+ <action_table id="85" length="1">6</action_table>
+ <action_table id="86" length="1">10</action_table>
+ <action_table id="87" length="1">8</action_table>
+ <action_table id="88" length="1">11</action_table>
+ <action_table id="89" length="1">7</action_table>
+ <action_table id="90" length="1">9</action_table>
+ <action_table id="91" length="1">71</action_table>
+ <action_table id="92" length="1">70</action_table>
+ <action_table id="93" length="2">0 69</action_table>
+ <action_table id="94" length="1">27</action_table>
+ <action_table id="95" length="2">3 24</action_table>
+ <action_table id="96" length="1">79</action_table>
+ <action_table id="97" length="1">75</action_table>
+ <action_table id="98" length="1">25</action_table>
+ <action_table id="99" length="1">80</action_table>
+ <action_table id="100" length="1">49</action_table>
+ <action_table id="101" length="1">37</action_table>
+ <action_table id="102" length="1">43</action_table>
+ <action_table id="103" length="1">61</action_table>
+ <action_table id="104" length="1">55</action_table>
+ <action_table id="105" length="1">31</action_table>
+ <action_table id="106" length="1">50</action_table>
+ <action_table id="107" length="1">38</action_table>
+ <action_table id="108" length="1">44</action_table>
+ <action_table id="109" length="1">62</action_table>
+ <action_table id="110" length="1">56</action_table>
+ <action_table id="111" length="1">32</action_table>
+ <action_table id="112" length="1">64</action_table>
+ <action_table id="113" length="1">65</action_table>
+ <action_table id="114" length="1">66</action_table>
+ <action_table id="115" length="1">63</action_table>
+ <action_table id="116" length="1">73</action_table>
+ <action_table id="117" length="1">74</action_table>
+ <action_table id="118" length="1">29</action_table>
+ <action_table id="119" length="1">78</action_table>
+ <action_table id="120" length="1">67</action_table>
+ <action_table id="121" length="1">51</action_table>
+ <action_table id="122" length="1">39</action_table>
+ <action_table id="123" length="1">45</action_table>
+ <action_table id="124" length="1">68</action_table>
+ <action_table id="125" length="1">57</action_table>
+ <action_table id="126" length="1">33</action_table>
+ <action_table id="127" length="1">77</action_table>
+ <action_table id="128" length="1">53</action_table>
+ <action_table id="129" length="1">41</action_table>
+ <action_table id="130" length="1">47</action_table>
+ <action_table id="131" length="1">59</action_table>
+ <action_table id="132" length="1">35</action_table>
+ <action_table id="133" length="1">48</action_table>
+ <action_table id="134" length="1">36</action_table>
+ <action_table id="135" length="1">42</action_table>
+ <action_table id="136" length="1">60</action_table>
+ <action_table id="137" length="1">54</action_table>
+ <action_table id="138" length="1">30</action_table>
+ <action_table id="139" length="1">52</action_table>
+ <action_table id="140" length="1">40</action_table>
+ <action_table id="141" length="1">46</action_table>
+ <action_table id="142" length="1">58</action_table>
+ <action_table id="143" length="1">34</action_table>
+ <action_table id="144" length="1">82</action_table>
+ <action_table id="145" length="1">76</action_table>
+ <action_table id="146" length="1">26</action_table>
+ <action_table id="147" length="1">72</action_table>
+ <action_table id="148" length="2">3 19</action_table>
+ <action_table id="149" length="2">3 20</action_table>
+ <action_table id="150" length="2">3 23</action_table>
+ <action_table id="151" length="2">3 21</action_table>
+ <action_table id="152" length="2">3 22</action_table>
+ <action_table id="153" length="2">3 18</action_table>
+ <action_table id="154" length="1">94</action_table>
+ <action_table id="155" length="2">0 92</action_table>
+ <action_table id="156" length="1">91</action_table>
+ <action_table id="157" length="1">93</action_table>
+ <action_table id="158" length="1">90</action_table>
+ <action_table id="159" length="1">83</action_table>
+ <action_table id="160" length="1">84</action_table>
+ <action_table id="161" length="1">88</action_table>
+ <action_table id="162" length="1">86</action_table>
+ <action_table id="163" length="1">89</action_table>
+ <action_table id="164" length="1">85</action_table>
+ <action_table id="165" length="1">87</action_table>
+ </action_table_list>
+ <start_state>10</start_state>
+ <entry_points>
+ <entry name="or_literal">166</entry>
+ <entry name="regular_type">168</entry>
+ <entry name="literal_pattern">203</entry>
+ <entry name="main">10</entry>
+ </entry_points>
+ <state_list length="205">
+ <state id="0">
+ <trans_list length="3">
+ <t>-128 9 0 x</t>
+ <t>10 10 10 0</t>
+ <t>11 127 0 x</t>
+ </trans_list>
+ </state>
+
+ <state id="1">
+ <trans_list length="7">
+ <t>-128 9 1 x</t>
+ <t>10 10 1 1</t>
+ <t>11 38 1 x</t>
+ <t>39 39 10 2</t>
+ <t>40 91 1 x</t>
+ <t>92 92 2 x</t>
+ <t>93 127 1 x</t>
+ </trans_list>
+ </state>
+
+ <state id="2">
+ <trans_list length="3">
+ <t>-128 9 1 x</t>
+ <t>10 10 1 1</t>
+ <t>11 127 1 x</t>
+ </trans_list>
+ </state>
+
+ <state id="3">
+ <trans_list length="3">
+ <t>-128 9 3 x</t>
+ <t>10 10 10 3</t>
+ <t>11 127 3 x</t>
+ </trans_list>
+ </state>
+
+ <state id="4">
+ <trans_list length="7">
+ <t>-128 9 4 x</t>
+ <t>10 10 4 1</t>
+ <t>11 33 4 x</t>
+ <t>34 34 171 x</t>
+ <t>35 91 4 x</t>
+ <t>92 92 5 x</t>
+ <t>93 127 4 x</t>
+ </trans_list>
+ </state>
+
+ <state id="5">
+ <trans_list length="3">
+ <t>-128 9 4 x</t>
+ <t>10 10 4 1</t>
+ <t>11 127 4 x</t>
+ </trans_list>
+ </state>
+
+ <state id="6">
+ <trans_list length="3">
+ <t>-128 9 6 x</t>
+ <t>10 10 168 4</t>
+ <t>11 127 6 x</t>
+ </trans_list>
+ </state>
+
+ <state id="7">
+ <trans_list length="7">
+ <t>-128 9 7 x</t>
+ <t>10 10 7 1</t>
+ <t>11 38 7 x</t>
+ <t>39 39 171 x</t>
+ <t>40 91 7 x</t>
+ <t>92 92 8 x</t>
+ <t>93 127 7 x</t>
+ </trans_list>
+ </state>
+
+ <state id="8">
+ <trans_list length="3">
+ <t>-128 9 7 x</t>
+ <t>10 10 7 1</t>
+ <t>11 127 7 x</t>
+ </trans_list>
+ </state>
+
+ <state id="9">
+ <trans_list length="7">
+ <t>-128 47 168 5</t>
+ <t>48 57 181 x</t>
+ <t>58 64 168 5</t>
+ <t>65 70 181 x</t>
+ <t>71 96 168 5</t>
+ <t>97 102 181 x</t>
+ <t>103 127 168 5</t>
+ </trans_list>
+ </state>
+
+ <state id="10" final="t">
+ <state_actions>6 7 x</state_actions>
+ <trans_list length="61">
+ <t>-128 -1 10 8</t>
+ <t>0 0 10 9</t>
+ <t>1 8 10 8</t>
+ <t>9 9 11 x</t>
+ <t>10 10 10 10</t>
+ <t>11 12 10 8</t>
+ <t>13 13 11 x</t>
+ <t>14 31 10 8</t>
+ <t>32 32 11 x</t>
+ <t>33 33 12 x</t>
+ <t>34 34 10 11</t>
+ <t>35 35 13 12</t>
+ <t>36 37 10 8</t>
+ <t>38 38 14 x</t>
+ <t>39 39 15 12</t>
+ <t>40 43 10 13</t>
+ <t>44 44 10 8</t>
+ <t>45 45 10 13</t>
+ <t>46 46 10 8</t>
+ <t>47 47 10 14</t>
+ <t>48 57 16 x</t>
+ <t>58 58 17 x</t>
+ <t>59 59 10 8</t>
+ <t>60 60 18 x</t>
+ <t>61 61 19 x</t>
+ <t>62 62 20 x</t>
+ <t>63 64 10 8</t>
+ <t>65 90 21 15</t>
+ <t>91 91 10 16</t>
+ <t>92 92 10 8</t>
+ <t>93 93 10 17</t>
+ <t>94 94 10 8</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 8</t>
+ <t>97 97 22 x</t>
+ <t>98 98 21 15</t>
+ <t>99 99 33 x</t>
+ <t>100 100 45 x</t>
+ <t>101 101 46 x</t>
+ <t>102 102 50 x</t>
+ <t>103 104 21 15</t>
+ <t>105 105 55 x</t>
+ <t>106 107 21 15</t>
+ <t>108 108 68 x</t>
+ <t>109 109 75 x</t>
+ <t>110 110 79 x</t>
+ <t>111 111 21 15</t>
+ <t>112 112 95 x</t>
+ <t>113 113 21 15</t>
+ <t>114 114 112 x</t>
+ <t>115 115 119 x</t>
+ <t>116 116 132 x</t>
+ <t>117 117 152 x</t>
+ <t>118 118 21 15</t>
+ <t>119 119 157 x</t>
+ <t>120 122 21 15</t>
+ <t>123 123 10 8</t>
+ <t>124 124 164 x</t>
+ <t>125 125 10 8</t>
+ <t>126 126 165 12</t>
+ <t>127 127 10 8</t>
+ </trans_list>
+ </state>
+
+ <state id="11" final="t">
+ <trans_list length="7">
+ <t>-128 8 10 18</t>
+ <t>9 9 11 x</t>
+ <t>10 12 10 18</t>
+ <t>13 13 11 x</t>
+ <t>14 31 10 18</t>
+ <t>32 32 11 x</t>
+ <t>33 127 10 18</t>
+ </trans_list>
+ </state>
+
+ <state id="12" final="t">
+ <trans_list length="3">
+ <t>-128 60 10 19</t>
+ <t>61 61 10 20</t>
+ <t>62 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="13" final="t">
+ <trans_list length="3">
+ <t>-128 9 0 x</t>
+ <t>10 10 10 0</t>
+ <t>11 127 0 x</t>
+ </trans_list>
+ </state>
+
+ <state id="14" final="t">
+ <trans_list length="3">
+ <t>-128 37 10 19</t>
+ <t>38 38 10 21</t>
+ <t>39 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="15" final="t">
+ <trans_list length="7">
+ <t>-128 9 1 x</t>
+ <t>10 10 1 1</t>
+ <t>11 38 1 x</t>
+ <t>39 39 10 2</t>
+ <t>40 91 1 x</t>
+ <t>92 92 2 x</t>
+ <t>93 127 1 x</t>
+ </trans_list>
+ </state>
+
+ <state id="16" final="t">
+ <trans_list length="3">
+ <t>-128 47 10 22</t>
+ <t>48 57 16 x</t>
+ <t>58 127 10 22</t>
+ </trans_list>
+ </state>
+
+ <state id="17" final="t">
+ <trans_list length="5">
+ <t>-128 57 10 19</t>
+ <t>58 58 10 23</t>
+ <t>59 60 10 19</t>
+ <t>61 61 10 24</t>
+ <t>62 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="18" final="t">
+ <trans_list length="5">
+ <t>-128 44 10 19</t>
+ <t>45 45 10 25</t>
+ <t>46 60 10 19</t>
+ <t>61 61 10 26</t>
+ <t>62 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="19" final="t">
+ <trans_list length="4">
+ <t>-128 60 10 19</t>
+ <t>61 61 10 27</t>
+ <t>62 62 10 28</t>
+ <t>63 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="20" final="t">
+ <trans_list length="3">
+ <t>-128 60 10 19</t>
+ <t>61 61 10 29</t>
+ <t>62 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="21" final="t">
+ <trans_list length="9">
+ <t>-128 47 10 30</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 30</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 30</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 30</t>
+ <t>97 122 21 15</t>
+ <t>123 127 10 30</t>
+ </trans_list>
+ </state>
+
+ <state id="22" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 23 x</t>
+ <t>100 107 21 15</t>
+ <t>108 108 27 x</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="23" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 24 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="24" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 104 21 15</t>
+ <t>105 105 25 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="25" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 110 21 15</t>
+ <t>111 111 26 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="26" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 21 32</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="27" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 111 21 15</t>
+ <t>112 112 28 x</t>
+ <t>113 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="28" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 103 21 15</t>
+ <t>104 104 29 x</t>
+ <t>105 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="29" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 30 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="30" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 120 21 15</t>
+ <t>121 121 31 x</t>
+ <t>122 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="31" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 111 21 15</t>
+ <t>112 112 32 x</t>
+ <t>113 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="32" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 33</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="33" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 101 21 15</t>
+ <t>102 102 34 x</t>
+ <t>103 110 21 15</t>
+ <t>111 111 35 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="34" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 21 34</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="35" final="t">
+ <trans_list length="12">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 108 21 15</t>
+ <t>109 109 36 x</t>
+ <t>110 110 39 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="36" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 108 21 15</t>
+ <t>109 109 37 x</t>
+ <t>110 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="37" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 104 21 15</t>
+ <t>105 105 38 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="38" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 21 35</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="39" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 40 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="40" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 41 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="41" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 42 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="42" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 116 21 15</t>
+ <t>117 117 43 x</t>
+ <t>118 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="43" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 44 x</t>
+ <t>100 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="44" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 21 36</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="45" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 110 21 15</t>
+ <t>111 111 21 37</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="46" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 47 x</t>
+ <t>109 109 21 15</t>
+ <t>110 110 49 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="47" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 48 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="48" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 38</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="49" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 99 21 15</t>
+ <t>100 100 21 39</t>
+ <t>101 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="50" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 104 21 15</t>
+ <t>105 105 51 x</t>
+ <t>106 110 21 15</t>
+ <t>111 111 54 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="51" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 52 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="52" final="t">
+ <trans_list length="12">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 53 x</t>
+ <t>98 99 21 15</t>
+ <t>100 100 21 40</t>
+ <t>101 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="53" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 21 41</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="54" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 21 42</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="55" final="t">
+ <trans_list length="16">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 101 21 15</t>
+ <t>102 102 21 43</t>
+ <t>103 103 56 x</t>
+ <t>104 109 21 15</t>
+ <t>110 110 60 x</t>
+ <t>111 115 21 15</t>
+ <t>116 116 66 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="56" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 57 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="57" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 110 21 15</t>
+ <t>111 111 58 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="58" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 59 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="59" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 44</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="60" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 61 x</t>
+ <t>100 104 21 15</t>
+ <t>105 105 65 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="61" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 62 x</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="62" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 116 21 15</t>
+ <t>117 117 63 x</t>
+ <t>118 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="63" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 99 21 15</t>
+ <t>100 100 64 x</t>
+ <t>101 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="64" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 45</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="65" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 21 46</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="66" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 67 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="67" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 21 47</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="68" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 69 x</t>
+ <t>102 104 21 15</t>
+ <t>105 105 70 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="69" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 119 21 15</t>
+ <t>120 120 21 48</t>
+ <t>121 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="70" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 71 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="71" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 72 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="72" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 73 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="73" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 74 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="74" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 21 49</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="75" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 76 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="76" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 77 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="77" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 78 x</t>
+ <t>100 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="78" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 103 21 15</t>
+ <t>104 104 21 50</t>
+ <t>105 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="79" final="t">
+ <trans_list length="15">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 80 x</t>
+ <t>98 100 21 15</t>
+ <t>101 101 87 x</t>
+ <t>102 102 89 x</t>
+ <t>103 110 21 15</t>
+ <t>111 111 90 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="80" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 108 21 15</t>
+ <t>109 109 81 x</t>
+ <t>110 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="81" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 82 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="82" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 83 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="83" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 111 21 15</t>
+ <t>112 112 84 x</t>
+ <t>113 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="84" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 85 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="85" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 86 x</t>
+ <t>100 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="86" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 51</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="87" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 119 21 15</t>
+ <t>120 120 88 x</t>
+ <t>121 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="88" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 21 52</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="89" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 21 53</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="90" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 91 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="91" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 92 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="92" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 93 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="93" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 94 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="94" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 108 21 15</t>
+ <t>109 109 21 54</t>
+ <t>110 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="95" final="t">
+ <trans_list length="14">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 96 x</t>
+ <t>98 99 21 15</t>
+ <t>100 100 104 x</t>
+ <t>101 113 21 15</t>
+ <t>114 114 105 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="96" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 97 x</t>
+ <t>115 115 21 15</t>
+ <t>116 116 100 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="97" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 98 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="98" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 99 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="99" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 21 55</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="100" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 101 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="101" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 102 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="102" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 103 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="103" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 21 56</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="104" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 21 57</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="105" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 104 21 15</t>
+ <t>105 105 106 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="106" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 58</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 58</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 58</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 58</t>
+ <t>97 109 21 15</t>
+ <t>110 110 107 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 58</t>
+ </trans_list>
+ </state>
+
+ <state id="107" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 108 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="108" final="t">
+ <trans_list length="9">
+ <t>-128 47 10 59</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 59</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 59</t>
+ <t>95 95 109 x</t>
+ <t>96 96 10 59</t>
+ <t>97 122 21 15</t>
+ <t>123 127 10 59</t>
+ </trans_list>
+ </state>
+
+ <state id="109" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 119 21 15</t>
+ <t>120 120 110 x</t>
+ <t>121 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="110" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 108 21 15</t>
+ <t>109 109 111 x</t>
+ <t>110 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="111" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 21 60</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="112" final="t">
+ <trans_list length="15">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 113 x</t>
+ <t>102 107 21 15</t>
+ <t>108 108 21 61</t>
+ <t>109 110 21 15</t>
+ <t>111 111 117 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="113" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 99 21 15</t>
+ <t>100 100 21 62</t>
+ <t>101 105 21 15</t>
+ <t>106 106 114 x</t>
+ <t>107 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="114" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 115 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="115" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 116 x</t>
+ <t>100 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="116" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 21 63</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="117" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 110 21 15</t>
+ <t>111 111 118 x</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="118" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 21 64</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="119" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 98 21 15</t>
+ <t>99 99 120 x</t>
+ <t>100 116 21 15</t>
+ <t>117 117 125 x</t>
+ <t>118 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="120" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 121 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="121" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 122 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="122" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 123 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="123" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 124 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="124" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 21 65</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="125" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 21 15</t>
+ <t>98 98 126 x</t>
+ <t>99 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="126" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 111 21 15</t>
+ <t>112 112 127 x</t>
+ <t>113 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="127" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 128 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="128" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 129 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="129" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 130 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="130" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 131 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="131" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 113 21 15</t>
+ <t>114 114 21 66</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="132" final="t">
+ <trans_list length="17">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 103 21 15</t>
+ <t>104 104 133 x</t>
+ <t>105 110 21 15</t>
+ <t>111 111 135 x</t>
+ <t>112 113 21 15</t>
+ <t>114 114 138 x</t>
+ <t>115 120 21 15</t>
+ <t>121 121 145 x</t>
+ <t>122 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="133" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 134 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="134" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 21 67</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="135" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 106 21 15</t>
+ <t>107 107 136 x</t>
+ <t>108 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="136" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 137 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="137" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 21 68</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="138" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 139 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="139" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 140 x</t>
+ <t>111 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="140" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 141 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="141" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 142 x</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="142" final="t">
+ <trans_list length="10">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 97 143 x</t>
+ <t>98 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="143" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 144 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="144" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 69</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="145" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 111 21 15</t>
+ <t>112 112 146 x</t>
+ <t>113 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="146" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 147 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="147" final="t">
+ <trans_list length="9">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 148 x</t>
+ <t>96 96 10 31</t>
+ <t>97 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="148" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 99 21 15</t>
+ <t>100 100 149 x</t>
+ <t>101 104 21 15</t>
+ <t>105 105 151 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="149" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 150 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="150" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 101 21 15</t>
+ <t>102 102 21 70</t>
+ <t>103 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="151" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 99 21 15</t>
+ <t>100 100 21 71</t>
+ <t>101 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="152" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 109 21 15</t>
+ <t>110 110 153 x</t>
+ <t>111 114 21 15</t>
+ <t>115 115 155 x</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="153" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 99 21 15</t>
+ <t>100 100 154 x</t>
+ <t>101 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="154" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 110 21 15</t>
+ <t>111 111 21 72</t>
+ <t>112 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="155" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 156 x</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="156" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 114 21 15</t>
+ <t>115 115 21 73</t>
+ <t>116 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="157" final="t">
+ <trans_list length="13">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 103 21 15</t>
+ <t>104 104 158 x</t>
+ <t>105 113 21 15</t>
+ <t>114 114 161 x</t>
+ <t>115 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="158" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 104 21 15</t>
+ <t>105 105 159 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="159" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 107 21 15</t>
+ <t>108 108 160 x</t>
+ <t>109 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="160" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 74</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="161" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 104 21 15</t>
+ <t>105 105 162 x</t>
+ <t>106 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="162" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 115 21 15</t>
+ <t>116 116 163 x</t>
+ <t>117 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="163" final="t">
+ <trans_list length="11">
+ <t>-128 47 10 31</t>
+ <t>48 57 21 15</t>
+ <t>58 64 10 31</t>
+ <t>65 90 21 15</t>
+ <t>91 94 10 31</t>
+ <t>95 95 21 15</t>
+ <t>96 96 10 31</t>
+ <t>97 100 21 15</t>
+ <t>101 101 21 75</t>
+ <t>102 122 21 15</t>
+ <t>123 127 10 31</t>
+ </trans_list>
+ </state>
+
+ <state id="164" final="t">
+ <trans_list length="3">
+ <t>-128 123 10 19</t>
+ <t>124 124 10 76</t>
+ <t>125 127 10 19</t>
+ </trans_list>
+ </state>
+
+ <state id="165" final="t">
+ <trans_list length="3">
+ <t>-128 9 3 x</t>
+ <t>10 10 10 3</t>
+ <t>11 127 3 x</t>
+ </trans_list>
+ </state>
+
+ <state id="166" final="t">
+ <state_actions>6 7 x</state_actions>
+ <trans_list length="8">
+ <t>-128 -1 166 77</t>
+ <t>0 0 166 78</t>
+ <t>1 44 166 77</t>
+ <t>45 45 166 79</t>
+ <t>46 91 166 77</t>
+ <t>92 92 167 x</t>
+ <t>93 93 166 80</t>
+ <t>94 127 166 77</t>
+ </trans_list>
+ </state>
+
+ <state id="167" final="t">
+ <trans_list length="18">
+ <t>-128 9 166 81</t>
+ <t>10 10 166 82</t>
+ <t>11 47 166 81</t>
+ <t>48 48 166 83</t>
+ <t>49 96 166 81</t>
+ <t>97 97 166 84</t>
+ <t>98 98 166 85</t>
+ <t>99 101 166 81</t>
+ <t>102 102 166 86</t>
+ <t>103 109 166 81</t>
+ <t>110 110 166 87</t>
+ <t>111 113 166 81</t>
+ <t>114 114 166 88</t>
+ <t>115 115 166 81</t>
+ <t>116 116 166 89</t>
+ <t>117 117 166 81</t>
+ <t>118 118 166 90</t>
+ <t>119 127 166 81</t>
+ </trans_list>
+ </state>
+
+ <state id="168" final="t">
+ <state_actions>6 7 x</state_actions>
+ <trans_list length="47">
+ <t>-128 -1 168 91</t>
+ <t>0 0 168 92</t>
+ <t>1 8 168 91</t>
+ <t>9 9 169 x</t>
+ <t>10 10 168 93</t>
+ <t>11 12 168 91</t>
+ <t>13 13 169 x</t>
+ <t>14 31 168 91</t>
+ <t>32 32 169 x</t>
+ <t>33 33 168 91</t>
+ <t>34 34 170 12</t>
+ <t>35 35 172 12</t>
+ <t>36 36 173 x</t>
+ <t>37 37 174 x</t>
+ <t>38 38 168 91</t>
+ <t>39 39 175 12</t>
+ <t>40 41 168 91</t>
+ <t>42 42 176 x</t>
+ <t>43 44 168 91</t>
+ <t>45 45 177 x</t>
+ <t>46 46 178 x</t>
+ <t>47 47 168 94</t>
+ <t>48 48 179 12</t>
+ <t>49 57 180 x</t>
+ <t>58 58 182 x</t>
+ <t>59 59 168 91</t>
+ <t>60 60 184 x</t>
+ <t>61 61 168 91</t>
+ <t>62 62 186 x</t>
+ <t>63 63 168 91</t>
+ <t>64 64 187 x</t>
+ <t>65 90 188 95</t>
+ <t>91 91 189 x</t>
+ <t>92 94 168 91</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 91</t>
+ <t>97 100 188 95</t>
+ <t>101 101 190 x</t>
+ <t>102 102 193 x</t>
+ <t>103 107 188 95</t>
+ <t>108 108 196 x</t>
+ <t>109 115 188 95</t>
+ <t>116 116 199 x</t>
+ <t>117 118 188 95</t>
+ <t>119 119 200 x</t>
+ <t>120 122 188 95</t>
+ <t>123 127 168 91</t>
+ </trans_list>
+ </state>
+
+ <state id="169" final="t">
+ <trans_list length="7">
+ <t>-128 8 168 96</t>
+ <t>9 9 169 x</t>
+ <t>10 12 168 96</t>
+ <t>13 13 169 x</t>
+ <t>14 31 168 96</t>
+ <t>32 32 169 x</t>
+ <t>33 127 168 96</t>
+ </trans_list>
+ </state>
+
+ <state id="170" final="t">
+ <trans_list length="7">
+ <t>-128 9 4 x</t>
+ <t>10 10 4 1</t>
+ <t>11 33 4 x</t>
+ <t>34 34 171 x</t>
+ <t>35 91 4 x</t>
+ <t>92 92 5 x</t>
+ <t>93 127 4 x</t>
+ </trans_list>
+ </state>
+
+ <state id="171" final="t">
+ <trans_list length="3">
+ <t>-128 104 168 97</t>
+ <t>105 105 168 98</t>
+ <t>106 127 168 97</t>
+ </trans_list>
+ </state>
+
+ <state id="172" final="t">
+ <trans_list length="3">
+ <t>-128 9 6 x</t>
+ <t>10 10 168 4</t>
+ <t>11 127 6 x</t>
+ </trans_list>
+ </state>
+
+ <state id="173" final="t">
+ <trans_list length="13">
+ <t>-128 32 168 99</t>
+ <t>33 33 168 100</t>
+ <t>34 41 168 99</t>
+ <t>42 42 168 101</t>
+ <t>43 46 168 99</t>
+ <t>47 47 168 102</t>
+ <t>48 62 168 99</t>
+ <t>63 63 168 103</t>
+ <t>64 93 168 99</t>
+ <t>94 94 168 104</t>
+ <t>95 125 168 99</t>
+ <t>126 126 168 105</t>
+ <t>127 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="174" final="t">
+ <trans_list length="13">
+ <t>-128 32 168 99</t>
+ <t>33 33 168 106</t>
+ <t>34 41 168 99</t>
+ <t>42 42 168 107</t>
+ <t>43 46 168 99</t>
+ <t>47 47 168 108</t>
+ <t>48 62 168 99</t>
+ <t>63 63 168 109</t>
+ <t>64 93 168 99</t>
+ <t>94 94 168 110</t>
+ <t>95 125 168 99</t>
+ <t>126 126 168 111</t>
+ <t>127 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="175" final="t">
+ <trans_list length="7">
+ <t>-128 9 7 x</t>
+ <t>10 10 7 1</t>
+ <t>11 38 7 x</t>
+ <t>39 39 171 x</t>
+ <t>40 91 7 x</t>
+ <t>92 92 8 x</t>
+ <t>93 127 7 x</t>
+ </trans_list>
+ </state>
+
+ <state id="176" final="t">
+ <trans_list length="3">
+ <t>-128 41 168 99</t>
+ <t>42 42 168 112</t>
+ <t>43 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="177" final="t">
+ <trans_list length="5">
+ <t>-128 44 168 99</t>
+ <t>45 45 168 113</t>
+ <t>46 61 168 99</t>
+ <t>62 62 168 114</t>
+ <t>63 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="178" final="t">
+ <trans_list length="3">
+ <t>-128 45 168 99</t>
+ <t>46 46 168 115</t>
+ <t>47 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="179" final="t">
+ <trans_list length="5">
+ <t>-128 47 168 116</t>
+ <t>48 57 180 x</t>
+ <t>58 119 168 116</t>
+ <t>120 120 9 x</t>
+ <t>121 127 168 116</t>
+ </trans_list>
+ </state>
+
+ <state id="180" final="t">
+ <trans_list length="3">
+ <t>-128 47 168 116</t>
+ <t>48 57 180 x</t>
+ <t>58 127 168 116</t>
+ </trans_list>
+ </state>
+
+ <state id="181" final="t">
+ <trans_list length="7">
+ <t>-128 47 168 117</t>
+ <t>48 57 181 x</t>
+ <t>58 64 168 117</t>
+ <t>65 70 181 x</t>
+ <t>71 96 168 117</t>
+ <t>97 102 181 x</t>
+ <t>103 127 168 117</t>
+ </trans_list>
+ </state>
+
+ <state id="182" final="t">
+ <trans_list length="4">
+ <t>-128 60 168 99</t>
+ <t>61 61 168 118</t>
+ <t>62 62 183 x</t>
+ <t>63 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="183" final="t">
+ <trans_list length="3">
+ <t>-128 61 168 119</t>
+ <t>62 62 168 120</t>
+ <t>63 127 168 119</t>
+ </trans_list>
+ </state>
+
+ <state id="184" final="t">
+ <trans_list length="15">
+ <t>-128 32 168 99</t>
+ <t>33 33 168 121</t>
+ <t>34 41 168 99</t>
+ <t>42 42 168 122</t>
+ <t>43 46 168 99</t>
+ <t>47 47 168 123</t>
+ <t>48 57 168 99</t>
+ <t>58 58 168 124</t>
+ <t>59 61 168 99</t>
+ <t>62 62 185 x</t>
+ <t>63 93 168 99</t>
+ <t>94 94 168 125</t>
+ <t>95 125 168 99</t>
+ <t>126 126 168 126</t>
+ <t>127 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="185" final="t">
+ <trans_list length="11">
+ <t>-128 32 168 127</t>
+ <t>33 33 168 128</t>
+ <t>34 41 168 127</t>
+ <t>42 42 168 129</t>
+ <t>43 46 168 127</t>
+ <t>47 47 168 130</t>
+ <t>48 93 168 127</t>
+ <t>94 94 168 131</t>
+ <t>95 125 168 127</t>
+ <t>126 126 168 132</t>
+ <t>127 127 168 127</t>
+ </trans_list>
+ </state>
+
+ <state id="186" final="t">
+ <trans_list length="13">
+ <t>-128 32 168 99</t>
+ <t>33 33 168 133</t>
+ <t>34 41 168 99</t>
+ <t>42 42 168 134</t>
+ <t>43 46 168 99</t>
+ <t>47 47 168 135</t>
+ <t>48 62 168 99</t>
+ <t>63 63 168 136</t>
+ <t>64 93 168 99</t>
+ <t>94 94 168 137</t>
+ <t>95 125 168 99</t>
+ <t>126 126 168 138</t>
+ <t>127 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="187" final="t">
+ <trans_list length="11">
+ <t>-128 32 168 99</t>
+ <t>33 33 168 139</t>
+ <t>34 41 168 99</t>
+ <t>42 42 168 140</t>
+ <t>43 46 168 99</t>
+ <t>47 47 168 141</t>
+ <t>48 93 168 99</t>
+ <t>94 94 168 142</t>
+ <t>95 125 168 99</t>
+ <t>126 126 168 143</t>
+ <t>127 127 168 99</t>
+ </trans_list>
+ </state>
+
+ <state id="188" final="t">
+ <trans_list length="9">
+ <t>-128 47 168 144</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 144</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 144</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 144</t>
+ <t>97 122 188 95</t>
+ <t>123 127 168 144</t>
+ </trans_list>
+ </state>
+
+ <state id="189" final="t">
+ <trans_list length="3">
+ <t>-128 93 168 145</t>
+ <t>94 94 168 146</t>
+ <t>95 127 168 145</t>
+ </trans_list>
+ </state>
+
+ <state id="190" final="t">
+ <trans_list length="13">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 110 188 95</t>
+ <t>111 111 191 x</t>
+ <t>112 113 188 95</t>
+ <t>114 114 192 x</t>
+ <t>115 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="191" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 101 188 95</t>
+ <t>102 102 188 148</t>
+ <t>103 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="192" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 113 188 95</t>
+ <t>114 114 188 149</t>
+ <t>115 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="193" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 113 188 95</t>
+ <t>114 114 194 x</t>
+ <t>115 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="194" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 110 188 95</t>
+ <t>111 111 195 x</t>
+ <t>112 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="195" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 108 188 95</t>
+ <t>109 109 188 150</t>
+ <t>110 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="196" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 100 188 95</t>
+ <t>101 101 197 x</t>
+ <t>102 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="197" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 113 188 95</t>
+ <t>114 114 198 x</t>
+ <t>115 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="198" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 113 188 95</t>
+ <t>114 114 188 151</t>
+ <t>115 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="199" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 110 188 95</t>
+ <t>111 111 188 152</t>
+ <t>112 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="200" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 103 188 95</t>
+ <t>104 104 201 x</t>
+ <t>105 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="201" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 100 188 95</t>
+ <t>101 101 202 x</t>
+ <t>102 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="202" final="t">
+ <trans_list length="11">
+ <t>-128 47 168 147</t>
+ <t>48 57 188 95</t>
+ <t>58 64 168 147</t>
+ <t>65 90 188 95</t>
+ <t>91 94 168 147</t>
+ <t>95 95 188 95</t>
+ <t>96 96 168 147</t>
+ <t>97 109 188 95</t>
+ <t>110 110 188 153</t>
+ <t>111 122 188 95</t>
+ <t>123 127 168 147</t>
+ </trans_list>
+ </state>
+
+ <state id="203" final="t">
+ <state_actions>6 7 x</state_actions>
+ <trans_list length="8">
+ <t>-128 9 203 154</t>
+ <t>10 10 203 155</t>
+ <t>11 33 203 154</t>
+ <t>34 34 203 156</t>
+ <t>35 90 203 154</t>
+ <t>91 91 203 157</t>
+ <t>92 92 204 x</t>
+ <t>93 127 203 154</t>
+ </trans_list>
+ </state>
+
+ <state id="204" final="t">
+ <trans_list length="14">
+ <t>-128 96 203 158</t>
+ <t>97 97 203 159</t>
+ <t>98 98 203 160</t>
+ <t>99 101 203 158</t>
+ <t>102 102 203 161</t>
+ <t>103 109 203 158</t>
+ <t>110 110 203 162</t>
+ <t>111 113 203 158</t>
+ <t>114 114 203 163</t>
+ <t>115 115 203 158</t>
+ <t>116 116 203 164</t>
+ <t>117 117 203 158</t>
+ <t>118 118 203 165</t>
+ <t>119 127 203 158</t>
+ </trans_list>
+ </state>
+ </state_list>
+ </machine>
+</ragel_def>
+<ragel_def name="section_parse">
+ <alphtype>int</alphtype>
+ <machine>
+ <action_list length="5">
+ <action id="0" name="clear_words" line="97" col="21"><text> word = lit = 0; word_len = lit_len = 0; </text></action>
+ <action id="1" name="store_lit" line="98" col="19"><text> lit = tokdata; lit_len = toklen; </text></action>
+ <action id="2" name="incl_err" line="101" col="18"><text> scan_error() &lt;&lt; "bad include statement" &lt;&lt; endl; </text></action>
+ <action id="3" name="handle_include" line="105" col="2"><text>
+ #if 0
+ char *inclSectionName = word;
+ char *inclFileName = 0;
+
+ /* Implement defaults for the input file and section name. */
+ if ( inclSectionName == 0 )
+ inclSectionName = parser-&gt;sectionName;
+
+ if ( lit != 0 )
+ inclFileName = prepareFileName( lit, lit_len );
+ else
+ inclFileName = fileName;
+
+ /* Check for a recursive include structure. Add the current file/section
+ * name then check if what we are including is already in the stack. */
+ includeStack.append( IncludeStackItem( fileName, parser-&gt;sectionName ) );
+
+ if ( recursiveInclude( inclFileName, inclSectionName ) )
+ scan_error() &lt;&lt; "include: this is a recursive include operation" &lt;&lt; endl;
+ else {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( inclFileName );
+ if ( ! inFile-&gt;is_open() ) {
+ scan_error() &lt;&lt; "include: could not open " &lt;&lt;
+ inclFileName &lt;&lt; " for reading" &lt;&lt; endl;
+ }
+
+ Scanner scanner( inclFileName, *inFile, output, parser,
+ inclSectionName, includeDepth+1 );
+ scanner.do_scan( );
+ delete inFile;
+ }
+
+ /* Remove the last element (len-1) */
+ includeStack.remove( -1 );
+ #endif
+ </text></action>
+ <action id="4" name="handle_token" line="152" col="2"><text>
+ InputLoc loc;
+
+ #ifdef PRINT_TOKENS
+ cerr &lt;&lt; "scanner:" &lt;&lt; line &lt;&lt; ":" &lt;&lt; column &lt;&lt;
+ ": sending token to the parser " &lt;&lt; Parser_lelNames[*p];
+ cerr &lt;&lt; " " &lt;&lt; toklen;
+ if ( tokdata != 0 )
+ cerr &lt;&lt; " " &lt;&lt; tokdata;
+ cerr &lt;&lt; endl;
+ #endif
+
+ loc.fileName = fileName;
+ loc.line = line;
+ loc.col = column;
+
+ parser-&gt;token( loc, type, tokdata, toklen );
+ </text></action>
+ </action_list>
+ <action_table_list length="4">
+ <action_table id="0" length="1">2</action_table>
+ <action_table id="1" length="2">0 1</action_table>
+ <action_table id="2" length="1">3</action_table>
+ <action_table id="3" length="1">4</action_table>
+ </action_table_list>
+ <start_state>3</start_state>
+ <error_state>0</error_state>
+ <entry_points>
+ <entry name="main">3</entry>
+ </entry_points>
+ <state_list length="4">
+ <state id="0">
+ <trans_list length="0">
+ </trans_list>
+ </state>
+
+ <state id="1">
+ <state_actions>x x 0</state_actions>
+ <trans_list length="3">
+ <t>-2147483648 131 x 0</t>
+ <t>132 132 2 1</t>
+ <t>133 2147483647 x 0</t>
+ </trans_list>
+ </state>
+
+ <state id="2">
+ <state_actions>x x 0</state_actions>
+ <trans_list length="3">
+ <t>-2147483648 58 x 0</t>
+ <t>59 59 3 2</t>
+ <t>60 2147483647 x 0</t>
+ </trans_list>
+ </state>
+
+ <state id="3" final="t">
+ <trans_list length="3">
+ <t>-2147483648 128 3 3</t>
+ <t>129 129 1 x</t>
+ <t>130 2147483647 3 3</t>
+ </trans_list>
+ </state>
+ </state_list>
+ </machine>
+</ragel_def>
+<host line="1" col="1">/*
+ * Copyright 2006-2007 Adrian Thurston &lt;thurston@complang.org&gt;
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include &lt;iostream&gt;
+#include &lt;fstream&gt;
+#include &lt;string.h&gt;
+
+#include "colm.h"
+#include "lmscan.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "avltree.h"
+#include "vector.h"
+
+//#define PRINT_TOKENS
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+</host>
+<write def_name="section_parse" line="45" col="2"><arg>data</arg></write>
+<host line="46">
+
+void Scanner::sectionParseInit()
+{
+ </host>
+<write def_name="section_parse" line="50" col="5"><arg>init</arg></write>
+<host line="51">}
+
+ostream &amp;Scanner::scan_error()
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+ cerr &lt;&lt; fileName &lt;&lt; ":" &lt;&lt; line &lt;&lt; ":" &lt;&lt; column &lt;&lt; ": ";
+ return cerr;
+}
+
+bool Scanner::recursiveInclude( char *inclFileName, char *inclSectionName )
+{
+ for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
+ if ( strcmp( si-&gt;fileName, inclFileName ) == 0 &amp;&amp;
+ strcmp( si-&gt;sectionName, inclSectionName ) == 0 )
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void Scanner::updateCol()
+{
+ char *from = lastnl;
+ if ( from == 0 )
+ from = tokstart;
+ //cerr &lt;&lt; "adding " &lt;&lt; tokend - from &lt;&lt; " to column" &lt;&lt; endl;
+ column += tokend - from;
+ lastnl = 0;
+}
+
+void Scanner::token( int type, char c )
+{
+ token( type, &amp;c, &amp;c + 1 );
+}
+
+void Scanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
+</host>
+<host line="178">
+
+void Scanner::token( int type, char *start, char *end )
+{
+ char *tokdata = 0;
+ int toklen = 0;
+ int *p = &amp;type;
+ int *pe = &amp;type + 1;
+
+ if ( start != 0 ) {
+ toklen = end-start;
+ tokdata = new char[toklen+1];
+ memcpy( tokdata, start, toklen );
+ tokdata[toklen] = 0;
+ }
+
+ </host>
+<write def_name="section_parse" line="196" col="3"><arg>exec</arg></write>
+<host line="197">
+
+ updateCol();
+}
+
+void Scanner::endSection( )
+{
+ /* Execute the eof actions for the section parser. */
+ </host>
+<write def_name="section_parse" line="207" col="3"><arg>eof</arg></write>
+<host line="208">
+}
+
+</host>
+<host line="516">
+
+</host>
+<write def_name="rlscan" line="518" col="4"><arg>data</arg></write>
+<host line="519">
+void Scanner::do_scan()
+{
+ int bufsize = 8;
+ char *buf = new char[bufsize];
+ const char last_char = 0;
+ int cs, act, have = 0;
+ int top, stack[32];
+ bool execute = true;
+
+ sectionParseInit();
+ </host>
+<write def_name="rlscan" line="530" col="5"><arg>init</arg></write>
+<host line="531">
+ while ( execute ) {
+ char *p = buf + have;
+ int space = bufsize - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. Grow it. */
+ bufsize = bufsize * 2;
+ char *newbuf = new char[bufsize];
+
+ /* Recompute p and space. */
+ p = newbuf + have;
+ space = bufsize - have;
+
+ /* Patch up pointers possibly in use. */
+ if ( tokstart != 0 )
+ tokstart = newbuf + ( tokstart - buf );
+ tokend = newbuf + ( tokend - buf );
+
+ /* Copy the new buffer in. */
+ memcpy( newbuf, buf, have );
+ delete[] buf;
+ buf = newbuf;
+ }
+
+ input.read( p, space );
+ int len = input.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = last_char, len = 1;
+ execute = false;
+ }
+
+ char *pe = p + len;
+ </host>
+<write def_name="rlscan" line="566" col="6"><arg>exec</arg></write>
+<host line="567">
+ /* Check if we failed. */
+ if ( cs == rlscan_error ) {
+ /* Machine failed before finding a token. I'm not yet sure if this
+ * is reachable. */
+ scan_error() &lt;&lt; "scanner error" &lt;&lt; endl;
+ exit(1);
+ }
+
+ /* Decide if we need to preserve anything. */
+ char *preserve = tokstart;
+
+ /* Now set up the prefix. */
+ if ( preserve == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - preserve;
+ memmove( buf, preserve, have );
+ unsigned int shiftback = preserve - buf;
+ if ( tokstart != 0 )
+ tokstart -= shiftback;
+ tokend -= shiftback;
+
+ preserve = buf;
+ }
+ }
+ delete[] buf;
+
+ InputLoc loc;
+ loc.fileName = "&lt;EOF&gt;";
+ loc.line = line;
+ loc.col = 1;
+ parser-&gt;token( loc, _eof, 0, 0 );
+}
+
+void scan( char *fileName, istream &amp;input, ostream &amp;output )
+{
+ Scanner scanner( fileName, input, output, 0, 0, 0 );
+}
+</host>
+</ragel>
+##### EXP #####
+SWITCH
+ token( KW_When );
+ token( KW_Eof );
+ token( KW_Err );
+ token( KW_Lerr );
+ token( KW_To );
+ token( KW_From );
+ token( TK_Word, tokstart, tokend );
+SWITCH
+ token( KW_Lex );
+ token( KW_Action );
+ token( KW_AlphType );
+ token( KW_Commit );
+ token( KW_Undo );
+ token( KW_Final );
+ token( KW_Translate );
+ token( KW_Token );
+ token( KW_Literal );
+ token( KW_NonTerm );
+ token( KW_Uses );
+ token( KW_Parser );
+ token( KW_Include );
+ token( KW_Write );
+ token( KW_Nfa );
+ token( KW_Pda );
+ token( KW_Rl );
+ token( KW_Cfl );
+ token( KW_Ignore );
+ token( KW_End );
+ token( KW_Pattern );
+ token( KW_Construct );
+ token( KW_Red );
+ token( KW_TypeId );
+ token( KW_TypeDef );
+ token( KW_If );
+ token( KW_Init );
+ token( KW_Reject );
+ token( KW_While );
+ token( KW_Else );
+ token( KW_SubParser );
+ token( KW_Next );
+ token( KW_Match );
+ token( KW_For );
+ token( KW_Iter );
+ token( KW_Find );
+ token( KW_Root );
+ token( KW_PrintXML );
+ token( KW_Then );
+ token( KW_Do );
+ token( KW_Namespace );
+ token( KW_Scanner );
+ token( TK_Word, tokstart, tokend );
diff --git a/test/tags3.lm b/test/tags3.lm
new file mode 100644
index 0000000..645b00b
--- /dev/null
+++ b/test/tags3.lm
@@ -0,0 +1,322 @@
+##### LM #####
+context tags
+
+ #
+ # Regular Definitions
+ #
+ rl def_name_char /[\-A-Za-z0-9._:?]/
+ rl def_name /[A-Za-z_:] def_name_char*/
+ rl def_system_literal /'"' [^"]* '"' | "'" [^']* "'"/
+
+ #
+ # Scanner for tag names.
+ #
+ lex
+ ignore /space+/
+ token tag_id /def_name/
+ end
+
+ #
+ # Scanner for attributes names
+ #
+ lex
+ ignore /space+/
+ token attr_name /def_name_char+/
+ literal `=
+ end
+
+ # Scanner for attribute values.
+ lex
+ ignore /space+/
+ token dquote_val /'"' ([^"] | '\\' any)* '"'/
+ token squote_val /"'" ([^'] | '\\' any)* "'"/
+ token unq_val /[^ \t\r\n<>"'] [^ \t\r\n<>]*/
+ end
+
+ literal `> `/>
+
+ #
+ # Tokens
+ #
+
+ lex
+ ignore /space+/
+ literal `< `</ `<!DOCTYPE
+ token doc_data /[^<]+/
+ token comment /"<!--" any* :>> "-->"/
+ end
+
+ #
+ # Tags
+ #
+
+ # This scanner is just for the id in close tags. The id needs to be looked up
+ # in the tag stack so we can determine if it is a stray.
+ lex
+ # Ignore whitespace.
+ ignore /space+/
+
+ token stray_close_id //
+ token close_id /def_name/
+ {
+ # If it is in the tag stack then it is a close_id. If not then it's a
+ # stray_close_id.
+ send_id: int = typeid<stray_close_id>
+
+ LocalTagStack: tag_stack = TagStack
+ for Tag: tag_id in LocalTagStack {
+ T: tag_id = Tag
+ if match_text == T.data {
+ send_id = typeid<close_id>
+ break
+ }
+ }
+
+ input.push( make_token( send_id input.pull(match_length) ) )
+ }
+ end
+
+ #
+ # Tag Stack
+ #
+
+ def tag_stack
+ [tag_id tag_stack]
+ | []
+
+ TagStack: tag_stack
+
+ #
+ # Document Type
+ #
+ # This scanner handles inside DOCTYPE tags (except keywords).
+ lex
+ ignore /space+/
+ token dt_name /def_name/
+ token dt_literal /def_system_literal/
+ token dt_bl /"[" [^\]]* "]"/
+ end
+
+ token dt_close /'>'/
+
+ # Using a separate scanner for the keywords in DOCTYPE prevents them from
+ # covering dt_name
+ lex
+ ignore /space+/
+ literal `SYSTEM `PUBLIC
+ end
+
+ def DOCTYPE [`<!DOCTYPE dt_name external_id dt_bl? dt_close]
+
+ def external_id
+ [`SYSTEM dt_literal?]
+ | [`PUBLIC dt_literal dt_literal?]
+
+ #
+ # Tags, with optionanal close.
+ #
+
+ def tag
+ [open_tag item* opt_close_tag]
+
+ def open_tag
+ [`< tag_id attr* `>]
+ {
+ TagStack = construct tag_stack
+ [r2 TagStack]
+ }
+
+ def opt_close_tag
+ [`</ close_id `>]
+ {
+ match TagStack [Top:tag_id Rest:tag_stack]
+ if r2.data == Top.data
+ TagStack = Rest
+ else
+ reject
+ }
+
+ | []
+ {
+ match TagStack [Top:tag_id Rest:tag_stack]
+ TagStack = Rest
+ }
+
+ #
+ # Empty tags
+ #
+ def empty_tag
+ [`< tag_id attr* `/>]
+
+ #
+ # Stray close tags
+ #
+ def stray_close
+ [`</ stray_close_id `>]
+
+
+ #
+ # Attributes
+ #
+
+ def attr
+ [attr_name eql_attr_val?]
+
+ def eql_attr_val [`= attr_val]
+
+ def attr_val
+ [squote_val]
+ | [dquote_val]
+ | [unq_val]
+ | []
+
+ #
+ # Items
+ #
+
+ def item
+ [DOCTYPE]
+ | [tag]
+ | [empty_tag]
+ | [stray_close]
+ | [doc_data]
+ | [comment]
+
+
+ token trailing /any*/
+
+ def start
+ [item* trailing]
+
+ #
+ # END GRAMMAR
+ #
+
+ int addDefaultAltTags( Start: ref<start> )
+ {
+ for T: open_tag in Start {
+ require T
+ ["<img" AttrList: attr* ">"]
+
+ haveAlt: bool = false
+ for A: attr in T {
+ if match A ["alt=" attr_val]
+ haveAlt = true
+ }
+
+ if !haveAlt {
+ for AL: attr* in T {
+ if match AL [] {
+ AL = construct attr*
+ [" alt=\"default alt\""]
+ break
+ }
+ }
+ }
+ }
+ }
+
+ int printLinks( Start: start )
+ {
+ for A:tag in Start {
+ require A
+ ["<a" AttrList: attr* ">" I: item* "</a>"]
+
+ for Attr: attr in AttrList {
+ if match Attr ["href = " AttrVal: attr_val]
+ print( 'link: ' I '\ntarget: ' AttrVal '\n\n' )
+ }
+ }
+ }
+
+
+ bool should_close( TI: tag_id )
+ {
+ return true
+ }
+
+ bool should_flatten( TI: tag_id )
+ {
+ return true
+ }
+
+ # Finds unclosed tags and puts the content after the tag. Afterwards
+ # all unclosed tags will be empty 'inside'.
+ int flatten( Start: ref<start> )
+ {
+ for TL: item* in Start {
+ require TL
+ [OT: open_tag Inside: item* Trailing: item*]
+
+ match OT
+ ['<' TagId: tag_id attr* '>']
+
+ if should_flatten( TagId )
+ {
+ require Inside
+ [item item*]
+
+ # Put Trailing at the end of inside.
+ for END: item* in Inside {
+ if match END [] {
+ END = Trailing
+ break
+ }
+ }
+
+ EmptyCloseTag: opt_close_tag =
+ construct opt_close_tag []
+
+ # Close the tag and put inside after it.
+ TL = construct item*
+ [OT EmptyCloseTag Inside]
+ }
+ }
+ }
+
+# int close( Start: ref<start> )
+# {
+# for TL: item in Start {
+# require TL
+# [OpenTag: open_tag Inside: item*]
+#
+# match OpenTag
+# ['<' TagId: tag_id attr* '>']
+#
+# if should_close( TagId )
+# {
+# parse CloseId: close_id[ TagId.data ]
+#
+# CloseTag: opt_close_tag =
+# construct opt_close_tag ['</' CloseId '>']
+#
+# # Close the tag and put inside after it.
+# TL = construct item
+# [OpenTag Inside CloseTag]
+# }
+# }
+# }
+end # tags
+
+cons Tags: tags[]
+Tags.TagStack = construct tags::tag_stack []
+
+parse HTML_P: tags::start(Tags)[ stdin ]
+HTML: tags::start = HTML_P
+flatten( HTML )
+print_xml( HTML )
+printLinks( HTML )
+##### IN #####
+<t1>
+
+ <t2>
+ <a href="foo">FOO</a>
+ <t3>
+ </t3>
+
+</t1>
+##### EXP #####
+<tags::start><tags::_repeat_item><tags::item><tags::tag><tags::open_tag><tags::_literal_0019>&lt;</tags::_literal_0019><tags::tag_id>t1</tags::tag_id><tags::_repeat_attr></tags::_repeat_attr><tags::_literal_0013>&gt;</tags::_literal_0013></tags::open_tag><tags::_repeat_item><tags::item><tags::tag><tags::open_tag><tags::_literal_0019>&lt;</tags::_literal_0019><tags::tag_id>t2</tags::tag_id><tags::_repeat_attr></tags::_repeat_attr><tags::_literal_0013>&gt;</tags::_literal_0013></tags::open_tag><tags::_repeat_item></tags::_repeat_item><tags::opt_close_tag></tags::opt_close_tag></tags::tag></tags::item><tags::item><tags::tag><tags::open_tag><tags::_literal_0019>&lt;</tags::_literal_0019><tags::tag_id>a</tags::tag_id><tags::_repeat_attr><tags::attr><tags::attr_name>href</tags::attr_name><tags::_opt_eql_attr_val><tags::eql_attr_val><tags::_literal_0009>=</tags::_literal_0009><tags::attr_val><tags::dquote_val>"foo"</tags::dquote_val></tags::attr_val></tags::eql_attr_val></tags::_opt_eql_attr_val></tags::attr></tags::_repeat_attr><tags::_literal_0013>&gt;</tags::_literal_0013></tags::open_tag><tags::_repeat_item><tags::item><tags::doc_data>FOO</tags::doc_data></tags::item></tags::_repeat_item><tags::opt_close_tag><tags::_literal_001b>&lt;/</tags::_literal_001b><tags::close_id>a</tags::close_id><tags::_literal_0013>&gt;</tags::_literal_0013></tags::opt_close_tag></tags::tag></tags::item><tags::item><tags::tag><tags::open_tag><tags::_literal_0019>&lt;</tags::_literal_0019><tags::tag_id>t3</tags::tag_id><tags::_repeat_attr></tags::_repeat_attr><tags::_literal_0013>&gt;</tags::_literal_0013></tags::open_tag><tags::_repeat_item></tags::_repeat_item><tags::opt_close_tag><tags::_literal_001b>&lt;/</tags::_literal_001b><tags::close_id>t3</tags::close_id><tags::_literal_0013>&gt;</tags::_literal_0013></tags::opt_close_tag></tags::tag></tags::item></tags::_repeat_item><tags::opt_close_tag><tags::_literal_001b>&lt;/</tags::_literal_001b><tags::close_id>t1</tags::close_id><tags::_literal_0013>&gt;</tags::_literal_0013></tags::opt_close_tag></tags::tag></tags::item></tags::_repeat_item><tags::trailing>
+</tags::trailing></tags::start>link: FOO
+target: "foo"
+
diff --git a/test/tags4.lm b/test/tags4.lm
new file mode 100644
index 0000000..f710378
--- /dev/null
+++ b/test/tags4.lm
@@ -0,0 +1,350 @@
+##### LM #####
+#
+#
+# This is somewhat broken. missing_close_id is cuasing close ids to be parseed
+# when they shouldn't. Maybe remove it.
+#
+#
+
+context tags
+ #
+ # Regular Definitions
+ #
+ rl def_name_char /[\-A-Za-z0-9._:?]/
+ rl def_name /[A-Za-z_:] def_name_char*/
+ rl def_system_literal /'"' [^"]* '"' | "'" [^']* "'"/
+
+ #
+ # Scanner for tag names.
+ #
+ lex
+ ignore /space+/
+ token tag_id /def_name/
+ end
+
+ #
+ # Scanner for attributes names
+ #
+ lex
+ ignore /space+/
+ token attr_name /def_name_char+/
+ literal `=
+ end
+
+ literal `> `/>
+
+ # Scanner for attribute values.
+ lex
+ ignore /space+/
+ token dquote_val /'"' ([^"] | '\\' any)* '"'/
+ token squote_val /"'" ([^'] | '\\' any)* "'"/
+ token unq_val /[^ \t\r\n<>"'] [^ \t\r\n<>]*/
+ end
+
+ #
+ # Tokens
+ #
+
+ lex
+ ignore /space+/
+
+ literal `< `</ `<!DOCTYPE
+ token close_tag
+ /'</' [\t ]* [a-zA-Z]+ '>'/
+
+ token doc_data /[^<]+/
+ token comment /'<!--' any* :>> '-->'/
+ end
+
+ #
+ # Tags
+ #
+
+ bool inTagStack( id: str )
+ {
+ LocalTagStack: tag_stack = TagStack
+ for Tag: tag_id in LocalTagStack {
+ if id == Tag.data
+ return true
+ }
+ return false
+ }
+
+ # This scanner is just for the id in close tags. The id needs to be looked up
+ # in the tag stack so we can determine if it is a stray.
+ lex
+ # Ignore whitespace.
+ ignore /space+/
+
+ token stray_close_id //
+ token missing_close_id //
+
+ token close_id /def_name/
+ {
+ # If it is in the tag stack then it is a close_id. If not then it's a
+ # stray_close_id.
+ send_id: int = typeid<stray_close_id>
+
+ if ( inTagStack( match_text ) ) {
+ print( 'CLOSE \'' match_text '\' IN TAG STACK\n' )
+
+ # The tag is in the stack, send missing close tags until we get to it.
+ match TagStack [Top:tag_id Rest:tag_stack]
+ TagStack = Rest
+ while ( Top.data != match_text ) {
+ print( 'SENDING missing close\n' )
+ input.push( make_token( typeid<missing_close_id> '' ) )
+ match TagStack [Top2:tag_id Rest2:tag_stack]
+ Top = Top2
+ TagStack = Rest2
+ }
+
+ print( 'SENDING close\n' )
+ input.push( make_token( typeid<close_id> input.pull( match_length ) ) )
+ }
+ else {
+ print( 'CLOSE \'' match_text '\' NOT IN TAG STACK\n' )
+ # The tag is not in the tag stack so send the id as a stray close.
+ input.push( make_token( typeid<stray_close> input.pull( match_length ) ) )
+ }
+ }
+ end
+
+ #
+ # Tag Stack
+ #
+
+ def tag_stack
+ [tag_id tag_stack]
+ | []
+
+ TagStack: tag_stack
+
+ #
+ # Document Type
+ #
+ # This scanner handles inside DOCTYPE tags (except keywords).
+ lex
+ ignore /space+/
+ token dt_name /def_name/
+ token dt_literal /def_system_literal/
+ token dt_bl /"[" [^\]]* "]"/
+ token dt_close /'>'/
+ end
+
+ # Using a separate scanner for the keywords in DOCTYPE prevents them from
+ # covering dt_name
+ lex
+ ignore /space+/
+ literal `SYSTEM `PUBLIC
+ end
+
+ def DOCTYPE [`<!DOCTYPE dt_name external_id dt_bl? dt_close]
+
+ def external_id
+ [`SYSTEM dt_literal?]
+ | [`PUBLIC dt_literal dt_literal?]
+
+ #
+ # Tags, with optionanal close.
+ #
+
+ def tag
+ [open_tag item* close_tag]
+
+ def unclosed_tag
+ [open_tag item* missing_close_id]
+
+ def open_tag
+ [`< tag_id attr* `>]
+ {
+ TagStack = construct tag_stack
+ [r2 TagStack]
+ }
+
+ #
+ # Empty tags
+ #
+ def empty_tag
+ [`< tag_id attr* `/>]
+
+ #
+ # Stray close tags
+ #
+ def stray_close
+ [close_tag]
+
+
+ #
+ # Attributes
+ #
+
+ def attr
+ [attr_name eql_attr_val?]
+
+ def eql_attr_val [`= attr_val]
+
+ def attr_val
+ [squote_val]
+ | [dquote_val]
+ | [unq_val]
+ | []
+
+ #
+ # Items
+ #
+
+ def item
+ [DOCTYPE]
+ | [tag]
+ | [unclosed_tag]
+ | [empty_tag]
+ | [stray_close]
+ | [doc_data]
+ | [comment]
+
+
+ token trailing /any*/
+
+ def start
+ [item* trailing]
+
+ #
+ # END GRAMMAR
+ #
+
+ int addDefaultAltTags( Start: ref<start> )
+ {
+ for T: open_tag in Start {
+ require T
+ ["<img" AttrList: attr* '>']
+
+ haveAlt: bool = false
+ for A: attr in T {
+ if match A ["alt=" attr_val]
+ haveAlt = true
+ }
+
+ if !haveAlt {
+ for AL: attr* in T {
+ if match AL [] {
+ AL = construct attr*
+ [" alt=\"default alt\""]
+ break
+ }
+ }
+ }
+ }
+ }
+
+ int printLinks( Start: start )
+ {
+ for A:tag in Start {
+ require A
+ ["<a" AttrList: attr* ">" I: item* "</a>"]
+
+ for Attr: attr in AttrList {
+ if match Attr ["href = " AttrVal: attr_val]
+ print( 'link: ' I '\ntarget: ' AttrVal '\n\n' )
+ }
+ }
+ }
+
+
+ bool should_close( TI: tag_id )
+ {
+ return true
+ }
+
+ bool should_flatten( TI: tag_id )
+ {
+ return true
+ }
+end # tags
+
+# Finds unclosed tags and puts the content after the tag. Afterwards
+# all unclosed tags will be empty 'inside'.
+#int flatten( Start: ref<start> )
+#{
+# for TL: item* in Start {
+# require TL
+# [OT: open_tag Inside: item* Trailing: item*]
+#
+# match OT
+# ['<' TagId: tag_id attr* '>']
+#
+# if should_flatten( TagId )
+# {
+# require Inside
+# [item item*]
+#
+# # Put Trailing at the end of inside.
+# for END: item* in Inside {
+# if match END [] {
+# END = Trailing
+# break
+# }
+# }
+#
+# str empty = ''
+# missing_close_id Missing = construct missing_close_id [empty]
+# opt_close_tag EmptyCloseTag =
+# construct opt_close_tag [Missing]
+#
+# # Close the tag and put inside after it.
+# TL = construct item*
+# [OT EmptyCloseTag Inside]
+# }
+# }
+#}
+#
+#int close( Start: ref<start> )
+#{
+# for TL: item in Start {
+# require TL
+# [OpenTag: open_tag Inside: item*]
+#
+# match OpenTag
+# ['<' TagId: tag_id attr* '>']
+#
+# if should_close( TagId )
+# {
+# close_id CloseId = construct close_id
+# [TagId.data]
+#
+# opt_close_tag CloseTag =
+# construct opt_close_tag ['</' CloseId '>']
+#
+# # Close the tag and put inside after it.
+# TL = construct item
+# [OpenTag Inside CloseTag]
+# }
+# }
+#}
+
+cons Tags: tags[]
+Tags.TagStack = construct tags::tag_stack []
+parse HTML: tags::start(Tags)[ stdin ]
+print( HTML )
+
+#print_xml( HTML )
+#for C: close_tag in HTML
+# print( C '\n' )
+##### IN #####
+<t1>
+
+ <t2>
+ <a href="foo">&FOO</a>
+ <t3>
+ </t3>
+
+</t1>
+##### EXP #####
+<t1>
+
+ <t2>
+ <a href="foo">&FOO</a>
+ <t3>
+ </t3>
+
+</t1>
diff --git a/test/tcontext1.lm b/test/tcontext1.lm
new file mode 100644
index 0000000..86db718
--- /dev/null
+++ b/test/tcontext1.lm
@@ -0,0 +1,35 @@
+##### LM #####
+
+lex
+ token tIDENTIFIER /[a-z][a-zA-Z_]*/ -ni
+ ignore /[ \t\n]+/
+ ignore comment /'#' [^\n]* '\n'/
+end
+
+lex
+ ignore /[\t ]+/
+ ignore /'#' [^\n]*/
+ literal `;
+ token NL /'\n'/
+end
+
+# Required whitespace, but newline is not allowed.
+token ws_no_nl
+ /[ \t]+ [^ \t\n]/
+ {
+ input.push( make_token( typeid<ws_no_nl> input.pull(match_length-1) ) )
+ }
+
+def method_call
+ [tIDENTIFIER ws_no_nl tIDENTIFIER `; NL]
+
+parse R: method_call[stdin]
+
+print_xml( R )
+print( '\n' )
+
+##### IN #####
+a bc;
+##### EXP #####
+<method_call><tIDENTIFIER>a</tIDENTIFIER><ws_no_nl> </ws_no_nl><tIDENTIFIER>bc</tIDENTIFIER><_literal_000b>;</_literal_000b><NL>
+</NL></method_call>
diff --git a/test/til.lm b/test/til.lm
new file mode 100644
index 0000000..6a93ab3
--- /dev/null
+++ b/test/til.lm
@@ -0,0 +1,194 @@
+##### LM #####
+lex
+ literal `var `if `then `else `while `do `for `read `write
+ `end `to `goto
+
+ literal `:= `!= `; `+ `- `* `/ `= `( `) `:
+
+ ignore /'//' [^\n]* '\n'/
+ ignore /[\n\t ]+/
+ token id /[a-zA-Z_]+/
+ token integernumber /[0-9]+/
+ token stringlit /'"' [^"]* '"'/
+end
+
+def program
+ [statement*]
+
+def statement
+ [declaration]
+| [assignment_statement]
+| [if_statement]
+| [while_statement]
+| [do_statement]
+| [for_statement]
+| [read_statement]
+| [write_statement]
+| [labelled_statement]
+| [goto_statement]
+
+def declaration
+ [`var id `;]
+
+def assignment_statement
+ [id `:= expression `;]
+
+def if_statement
+ [`if expression `then statement* opt_else_statement `end]
+
+def opt_else_statement
+ [`else statement*]
+| []
+
+def while_statement
+ [`while expression `do statement* `end]
+
+def do_statement
+ [`do statement* `while expression `;]
+
+def for_statement
+ [`for id `:= expression `to expression `do statement* `end]
+
+def read_statement
+ [`read id `;]
+
+def write_statement
+ [`write expression `;]
+
+def expression
+ [term]
+| [expression eqop term]
+
+def eqop [`=] | [`!=]
+
+def term
+ [factor]
+| [term addop factor]
+
+def addop [`+] | [`-]
+
+def factor
+ [primary]
+| [factor mulop primary]
+
+def mulop [`*] | [`/]
+
+def primary
+ [id]
+| [lit]
+| [`( expression `)]
+
+def lit
+ [integernumber]
+| [stringlit]
+
+def labelled_statement
+ [id `: statement]
+
+def goto_statement
+ [`goto id `;]
+
+parse P: program[stdin]
+
+#for S:statement* in P
+#{
+# if match S [L0: id ':'
+# First: statement
+# Rest: statement*]
+# {
+# for Check: statement* in Rest
+# {
+# if match Check
+# ['if' E: expression 'then'
+# 'goto' Targ: id ';'
+# 'end'
+# T: statement*]
+# {
+# # This truncates Rest
+# Check = construct statement* []
+#
+# # Replace the labeled statement through to the goto with a
+# # do ... while.
+# S = construct statement*
+# ['do'
+# First
+# Rest
+# 'while' E ';'
+# T]
+# break
+# }
+# }
+# }
+#}
+
+for S: statement* in P
+{
+ if match S [Label: id `:
+ First: statement
+ Rest: statement*]
+ {
+ Expr: expression
+ Following: statement*
+
+ # Look though the remaining statements for a goto back to the label.
+ # The repeat iterator yields only top-level statement lists. It
+ # restricts our search to the same nesting depth as the label.
+ for Check: statement* in Rest
+ {
+ if match Check
+ [`if E: expression `then
+ `goto L:id `;
+ `end
+ SL: statement*]
+ {
+ Expr = E
+ Following = SL
+
+ # Check iterates over tails of Rest. Assigning an empty list
+ # to check truncates the Rest list. What we cut off is saved in
+ # Following (excluding the if statement).
+ Check = construct statement* []
+ }
+ }
+
+ # If a goto was found, then perform the rewrite.
+ if ( Expr )
+ {
+ # Replace the labelled statement through to the goto
+ # with a do ... while.
+ S = construct statement* [
+ "do
+ " [^First]
+ " [^Rest]
+ "while [^Expr];
+ Following]
+ }
+ }
+}
+
+print( P )
+##### IN #####
+
+var a;
+a := 1;
+
+head:
+
+a := a + 1;
+c := d;
+
+if a = 10 then
+ goto head;
+end
+
+hi := there;
+##### EXP #####
+
+var a;
+a := 1;
+
+do
+ a := a + 1;
+ c := d;
+while a = 10;
+hi := there;
diff --git a/test/translate1.lm b/test/translate1.lm
new file mode 100644
index 0000000..4403ca6
--- /dev/null
+++ b/test/translate1.lm
@@ -0,0 +1,28 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `* `( `)
+ token id /[a-zA-Z_]+/
+ {
+ t: str = input.pull( match_length )
+ input.push( make_token( typeid<id> t ) )
+ }
+end
+
+def foo [id]
+
+def item
+ [id]
+| [foo]
+| [`( item* `)]
+
+def start
+ [item*]
+
+parse Input: start[ stdin ]
+print( Input )
+
+##### IN #####
+a b ( c d ) e f
+##### EXP #####
+a b ( c d ) e f
diff --git a/test/translate2.lm b/test/translate2.lm
new file mode 100644
index 0000000..47bda35
--- /dev/null
+++ b/test/translate2.lm
@@ -0,0 +1,62 @@
+##### LM #####
+lex
+ ignore /space+/
+ literal `# `{ `}
+ token id2 /[a-zA-Z_]+/
+end
+
+def item2
+ [id2]
+| [`{ item2* `}]
+
+def start2
+ [item2*]
+
+context ctx
+
+ lex
+ ignore /space+/
+ literal `* `( `) `!
+ token SEMI_NL /';\n'/
+ token id /[a-zA-Z_0-9]+/
+
+ token ddd /'...'/ {
+ print('translating\n')
+ input.pull( match_length )
+ input.push( make_token( typeid<id> "dot" ) )
+ input.push( make_token( typeid<id> "dot" ) )
+ input.push( make_token( typeid<id> "dot" ) )
+ }
+ end
+
+ def item
+ [id]
+ | [`( item* `)]
+
+ def A [] {
+ print( 'A\n' )
+ }
+
+ def B [] {
+ print( 'B\n' )
+ }
+
+
+ def start
+ [A item* `!]
+ | [B item* SEMI_NL]
+
+end # ctx
+
+CTX: ctx = cons ctx []
+parse Input: ctx::start( CTX ) [ stdin ]
+print( Input )
+
+##### IN #####
+a b c ( d1 ... d2 ) e f g ;
+##### EXP #####
+A
+translating
+B
+translating
+a b c ( d1 dotdotdot d2 ) e f g ;
diff --git a/test/travs1.lm b/test/travs1.lm
new file mode 100644
index 0000000..c2f7171
--- /dev/null
+++ b/test/travs1.lm
@@ -0,0 +1,286 @@
+##### LM #####
+lex
+ ignore /[\t\n ]+/
+ literal `^ `| `- `, `: `! `? `.
+ literal `( `) `{ `} `* `& `+
+
+ literal `-- `:> `:>> `<: `-> `**
+
+ token word /[a-zA-Z_][a-zA-Z0-9_]*/
+ token uint /[0-9]+/
+end
+
+
+def start [expression]
+
+def expression [term expression_op*]
+
+def expression_op
+ [`| term]
+| [`& term]
+| [`- term]
+| [`-- term]
+
+def term [factor_rep term_rest]
+
+# This list is done manually to get shortest match.
+def term_rest
+ []
+| [term_op term_rest]
+
+def term_op
+ [factor_rep]
+| [`. factor_rep]
+| [`:> factor_rep]
+| [`:>> factor_rep]
+| [`<: factor_rep]
+
+def factor_rep
+ [factor_neg factor_rep_op*]
+
+def factor_rep_op
+ [`*]
+| [`**]
+| [`?]
+| [`+]
+| [`{ factor_rep_num `}]
+| [`{ `, factor_rep_num `}]
+| [`{ factor_rep_num `, `}]
+| [`{ factor_rep_num `, factor_rep_num `}]
+
+def factor_rep_num [uint]
+
+def factor_neg
+ [`! factor_neg]
+| [`^ factor_neg]
+| [factor]
+
+def factor
+ [alphabet_num]
+| [word]
+| [`( expression `)]
+
+def alphabet_num
+ [uint]
+
+parse S: start[stdin]
+
+#
+# Top-Down, Left-Right
+#
+
+int do_topdown_leftright( T: ref<any> )
+{
+ for C:any in child(T) {
+ yield C
+ do_topdown_leftright( C )
+ }
+}
+
+iter topdown_leftright( T: ref<any> )
+{
+ do_topdown_leftright( T )
+}
+
+#
+# Bottom-Up, Left-Right
+#
+
+int do_bottomup_leftright( T: ref<any> )
+{
+ for C:any in child(T) {
+ do_bottomup_leftright( C )
+ yield C
+ }
+}
+
+iter bottomup_leftright( T: ref<any> )
+{
+ do_bottomup_leftright( T )
+}
+
+
+#
+# Top-Down, Right-Left
+#
+
+int do_topdown_rightleft( T: ref<any> )
+{
+ for C:any in rev_child(T) {
+ yield C
+ do_topdown_rightleft( C )
+ }
+}
+
+iter topdown_rightleft( T: ref<any> )
+{
+ do_topdown_rightleft( T )
+}
+
+#
+# Bottom-Up, Right-Left
+#
+
+int do_bottomup_rightleft( T: ref<any> )
+{
+ for C:any in rev_child(T) {
+ do_bottomup_rightleft( C )
+ yield C
+ }
+}
+
+iter bottomup_rightleft( T: ref<any> )
+{
+ do_bottomup_rightleft( T )
+}
+
+#
+# Testing
+#
+
+print( 'bottomup_leftright\n' )
+for T1: any in bottomup_leftright( S )
+{
+ print( ^T1 '\n' )
+}
+
+print( 'bottomup_rightleft\n' )
+for T2: any in bottomup_rightleft( S )
+{
+ print( ^T2 '\n' )
+}
+
+print( 'topdown_leftright\n' )
+for T3: any in topdown_leftright( S )
+{
+ print( ^T3 '\n' )
+}
+
+print( 'topdown_rightleft\n' )
+for T4: any in topdown_rightleft( S )
+{
+ print( ^T4 '\n' )
+}
+##### IN #####
+1 | 2 3
+##### EXP #####
+bottomup_leftright
+1
+1
+1
+1
+
+1
+
+1
+|
+2
+2
+2
+2
+
+2
+3
+3
+3
+3
+
+3
+3
+
+3
+2 3
+| 2 3
+
+| 2 3
+1 | 2 3
+bottomup_rightleft
+
+
+
+3
+3
+3
+3
+3
+3
+3
+
+2
+2
+2
+2
+2
+2 3
+|
+| 2 3
+| 2 3
+
+
+1
+1
+1
+1
+1
+1
+1 | 2 3
+topdown_leftright
+1 | 2 3
+1
+1
+1
+1
+1
+1
+
+
+| 2 3
+| 2 3
+|
+2 3
+2
+2
+2
+2
+2
+
+3
+3
+3
+3
+3
+3
+3
+
+
+
+topdown_rightleft
+1 | 2 3
+| 2 3
+
+| 2 3
+2 3
+3
+
+3
+3
+
+3
+3
+3
+3
+2
+
+2
+2
+2
+2
+|
+1
+
+1
+
+1
+1
+1
+1
diff --git a/test/treecmp1.lm b/test/treecmp1.lm
new file mode 100644
index 0000000..3bd5b23
--- /dev/null
+++ b/test/treecmp1.lm
@@ -0,0 +1,25 @@
+##### LM #####
+rl ident_pattern /[a-zA-Z_][a-zA-Z_0-9]*/
+rl number_pattern /[0-9]+/
+
+lex
+ ignore /[ \t\n]+/
+ token id /ident_pattern/
+ token number /number_pattern/
+end
+
+def four_ids
+ [id id id id]
+
+B: id = construct id "b"
+
+parse Input: four_ids[ stdin ]
+
+for Id: id in Input {
+ if ( Id == B )
+ print( B '\n' )
+}
+##### IN #####
+a b c d
+##### EXP #####
+b
diff --git a/test/typeref1.lm b/test/typeref1.lm
new file mode 100644
index 0000000..de1fa26
--- /dev/null
+++ b/test/typeref1.lm
@@ -0,0 +1,33 @@
+##### LM #####
+namespace n1
+
+ namespace n2
+ lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+ end
+
+ def start
+ [id*]
+ end
+end
+
+parse P: n1::n2::id*[stdin]
+print( P )
+##### IN #####
+
+##### EXP #####
+##### IN #####
+a
+##### EXP #####
+a
+##### IN #####
+a
+ b
+ c
+d
+##### EXP #####
+a
+ b
+ c
+d
diff --git a/test/typeref2.lm b/test/typeref2.lm
new file mode 100644
index 0000000..d95e3f1
--- /dev/null
+++ b/test/typeref2.lm
@@ -0,0 +1,34 @@
+##### LM #####
+namespace n1
+
+ namespace n2
+ lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+ end
+
+ def start
+ [id*]
+ end
+end
+
+parse P: n1::n2::id+[stdin]
+print( P )
+##### IN #####
+
+##### EXP #####
+NIL--noeol
+##### IN #####
+a
+##### EXP #####
+a
+##### IN #####
+a
+ b
+ c
+d
+##### EXP #####
+a
+ b
+ c
+d
diff --git a/test/typeref3.lm b/test/typeref3.lm
new file mode 100644
index 0000000..dedff0a
--- /dev/null
+++ b/test/typeref3.lm
@@ -0,0 +1,27 @@
+##### LM #####
+namespace n1
+
+ namespace n2
+ lex
+ token id / 'a' .. 'z' /
+ ignore / '\n' | '\t' | ' ' /
+ end
+
+ def start
+ [id*]
+ end
+end
+
+parse P: n1::n2::id?[stdin]
+print( P )
+##### IN #####
+
+##### EXP #####
+##### IN #####
+a
+##### EXP #####
+a
+##### IN #####
+a b
+##### EXP #####
+NIL--noeol
diff --git a/test/undofrag1.lm b/test/undofrag1.lm
new file mode 100644
index 0000000..a997cb8
--- /dev/null
+++ b/test/undofrag1.lm
@@ -0,0 +1,67 @@
+##### LM #####
+
+lex
+ ignore /space+/
+ literal `# `{ `}
+ token id2 /[a-zA-Z_]+/
+end
+
+def item2
+ [id2]
+| [`{ item2* `}]
+
+def start2
+ [item2*]
+
+
+context ctx
+
+ SP: parser<start2>
+
+ lex
+ ignore /space+/
+ literal `* `( `) `!
+ token semi_nl /';\n'/
+ token id /[a-zA-Z_]+/
+ end
+
+ def item
+ [id]
+ | [`( item* `)]
+
+
+ def A [] {
+ print( 'A\n' )
+ send SP "{ A{d} }"
+ }
+
+ def B [] {
+ print( 'B\n' )
+ send SP "{ B{d} }"
+ }
+
+ def start1
+ [A item* `!]
+ | [B item* semi_nl]
+
+end # ctx
+
+
+CTX: ctx = cons ctx []
+CTX.SP = cons parser<start2> []
+send CTX.SP "a b{c}"
+
+parse Input: ctx::start1( CTX )[stdin]
+
+send CTX.SP "{e}f g"
+
+print( Input )
+print( CTX.SP() '\n' )
+
+##### IN #####
+a b c ( d ) e f g ;
+##### EXP #####
+A
+B
+a b c ( d ) e f g ;
+a b{c}{ B{d} }{e}f g
diff --git a/test/undofrag2.lm b/test/undofrag2.lm
new file mode 100644
index 0000000..bbade5e
--- /dev/null
+++ b/test/undofrag2.lm
@@ -0,0 +1,50 @@
+##### LM #####
+context undo
+
+ lex
+ ignore /( ' ' | '\t')+/
+ literal `* `( `) `^ `;
+ token NL /'\n'/
+ token id /[a-zA-Z_]+/
+ end
+
+ Out: parser<out>
+
+ def out_item
+ [id]
+ | [`( item* `)]
+
+ def out
+ [out_item*]
+
+ def item
+ [id]
+ {
+ send Out [r1]
+ }
+ | [`( item* `)]
+ {
+ send Out ['(']
+ send Out [r2]
+ send Out [')']
+ }
+
+ def A1 []
+ def A2 []
+
+ def start
+ [A1 item* `^]
+ | [A2 item* `; NL]
+
+end # undo
+
+cons Undo: undo[]
+Undo.Out = construct parser<undo::out> []
+
+parse Input: undo::start(Undo)[ stdin ]
+print( Input )
+
+##### IN #####
+a b c;
+##### EXP #####
+a b c;
diff --git a/test/undofrag3.lm b/test/undofrag3.lm
new file mode 100644
index 0000000..ed96cba
--- /dev/null
+++ b/test/undofrag3.lm
@@ -0,0 +1,56 @@
+##### LM #####
+context undo
+
+ lex
+ ignore /( ' ' | '\t' )+/
+ literal `* `( `) `^ `; `.
+ token NL /'\n'/
+ token id /[a-zA-Z_]+/
+ end
+
+ Out: parser<out>
+
+ def out_item
+ [id]
+ | [`( item* `)]
+
+ def out
+ [out_item*]
+
+ def item
+ [id]
+ {
+ send Out [r1]
+ }
+ | [`( item* `)]
+ {
+ send Out ['(']
+ send Out [r2]
+ send Out [')']
+ }
+
+ def A1 []
+ def A2 []
+
+ def F
+ []
+ {
+ print_xml( Out() )
+ }
+
+ def start
+ [A1 item* F `. `^]
+ | [A2 item* F `. `; NL]
+
+end # undo
+
+cons Undo: undo[]
+Undo.Out = construct parser<undo::out> []
+
+parse Input: undo::start(Undo)[ stdin ]
+print( Input )
+
+##### IN #####
+a . ;
+##### EXP #####
+<undo::out><undo::_repeat_out_item><undo::out_item><undo::id>a</undo::id></undo::out_item></undo::_repeat_out_item></undo::out><undo::out><undo::_repeat_out_item><undo::out_item><undo::id>a</undo::id></undo::out_item></undo::_repeat_out_item></undo::out>a . ;
diff --git a/test/while1.lm b/test/while1.lm
new file mode 100644
index 0000000..645c28d
--- /dev/null
+++ b/test/while1.lm
@@ -0,0 +1,52 @@
+##### LM #####
+while 0
+ print( '0\n' )
+
+global I: int = 3
+
+int f()
+{
+ I = I - 1
+ print( ' ' I )
+}
+
+# simple expr and stmt
+while I
+ f()
+print( '\n' )
+
+# compound stmt list
+I = 3
+while I
+{
+ I = I - 1
+ print( ' ' I )
+}
+print( '\n' )
+
+# paren expr
+I = 3
+while ( I )
+ f()
+print( '\n' )
+
+# expr with computation
+I = 3
+while ( I + 1 )
+ f()
+print( '\n' )
+
+# computation and stmt list
+I = 3
+while ( I + 2 )
+{
+ I = I - 1
+ print( ' ' I )
+}
+print( '\n' )
+##### EXP #####
+ 2 1 0
+ 2 1 0
+ 2 1 0
+ 2 1 0 -1
+ 2 1 0 -1 -2