summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2014-10-13 19:14:30 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2014-10-13 19:14:30 +0000
commiteafd7a3974e8605fd02794269db6114a3446e016 (patch)
tree064737b35dbe10f2995753ead92f95bac30ba048 /contrib
downloadragel-tarball-eafd7a3974e8605fd02794269db6114a3446e016.tar.gz
ragel-6.9ragel-6.9
Diffstat (limited to 'contrib')
-rw-r--r--contrib/Makefile.am2
-rw-r--r--contrib/Makefile.in398
-rw-r--r--contrib/ragel.m453
-rw-r--r--contrib/ragel.make6
-rw-r--r--contrib/unicode2ragel.rb305
5 files changed, 764 insertions, 0 deletions
diff --git a/contrib/Makefile.am b/contrib/Makefile.am
new file mode 100644
index 0000000..7ef7e8d
--- /dev/null
+++ b/contrib/Makefile.am
@@ -0,0 +1,2 @@
+
+EXTRA_DIST = ragel.make ragel.m4 unicode2ragel.rb
diff --git a/contrib/Makefile.in b/contrib/Makefile.in
new file mode 100644
index 0000000..1dffb01
--- /dev/null
+++ b/contrib/Makefile.in
@@ -0,0 +1,398 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = contrib
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.in
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/ragel/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EXEEXT = @EXEEXT@
+FIG2DEV = @FIG2DEV@
+GDC = @GDC@
+GMCS = @GMCS@
+GOBIN = @GOBIN@
+GOBJC = @GOBJC@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JAVAC = @JAVAC@
+KELBT = @KELBT@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PDFLATEX = @PDFLATEX@
+PUBDATE = @PUBDATE@
+RAGEL = @RAGEL@
+RANLIB = @RANLIB@
+RUBY = @RUBY@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TXL = @TXL@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+EXTRA_DIST = ragel.make ragel.m4 unicode2ragel.rb
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign contrib/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign contrib/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+tags TAGS:
+
+ctags CTAGS:
+
+cscope cscopelist:
+
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic cscopelist-am \
+ ctags-am distclean distclean-generic distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
+ pdf-am ps ps-am tags-am uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/contrib/ragel.m4 b/contrib/ragel.m4
new file mode 100644
index 0000000..72ce4b9
--- /dev/null
+++ b/contrib/ragel.m4
@@ -0,0 +1,53 @@
+dnl Check for presence of the Ragel State Machine generator.
+dnl
+dnl This macro checks for the presence of the ragel tool in the system,
+dnl and whether the ragel tool is absolutely needed for a complete
+dnl build.
+dnl
+dnl To check for the need for Ragel, you have to provide the relative
+dnl path of a source file generated through Ragel: if the file is
+dnl present in the source tree, a missing ragel command will not cause
+dnl the configure to abort.
+
+AC_DEFUN([_RAGEL_VARS], [
+ AC_ARG_VAR([RAGEL], [Ragel generator command])
+ AC_ARG_VAR([RAGELFLAGS], [Ragel generator flags])
+])
+
+AC_DEFUN([CHECK_RAGEL], [
+ AC_REQUIRE([_RAGEL_VARS])
+ AC_CHECK_PROG([RAGEL], [ragel], [ragel], [no])
+
+ dnl We set RAGEL to false so that it would execute the "false"
+ dnl command if needed.
+ AS_IF([test x"$RAGEL" = x"no"],
+ [RAGEL=false],
+ AS_IF([test x"$2" != "x"],
+ [ragel_version=`$RAGEL --version | sed -n -e '1s:.*version \(@<:@0-9@:>@\.@<:@0-9@:>@\) .*:\1:p'`
+ ragel_version_compare=`echo $ragel_version | tr -d .`
+ ragel_wanted_version=`echo $2 | tr -d .`
+ AS_IF([test $ragel_version_compare -lt $ragel_wanted_version],
+ [AC_MSG_WARN([Found Ragel $ragel_version but Ragel $2 requested])
+ RAGEL=false
+ ])
+ ]))
+
+ dnl Only test the need if not found
+ AS_IF([test x"$RAGEL" = x"false"], [
+ AC_MSG_CHECKING([whether we need ragel to regenerate sources])
+ AS_IF([test -a "${srcdir}/$1"], [ragel_needed=no], [ragel_needed=yes])
+ AC_MSG_RESULT([$ragel_needed])
+
+ AS_IF([test x"$ragel_needed" = x"yes"],
+ [AC_MSG_ERROR([dnl
+You need Ragel to build from development sources.
+You can find Ragel at http://www.complang.org/ragel/dnl
+ ])])
+ ])
+])
+
+AC_DEFUN([CHECK_RAGEL_AM], [
+ CHECK_RAGEL([$1], [$2])
+
+ AM_CONDITIONAL([HAVE_RAGEL], [test x"$RAGEL" != x"false"])
+])
diff --git a/contrib/ragel.make b/contrib/ragel.make
new file mode 100644
index 0000000..f7a71b5
--- /dev/null
+++ b/contrib/ragel.make
@@ -0,0 +1,6 @@
+# -*- Makefile -*-
+
+SUFFIXES = .rl
+
+.rl.c:
+ $(RAGEL) $(RAGELFLAGS) -C $< -o $@
diff --git a/contrib/unicode2ragel.rb b/contrib/unicode2ragel.rb
new file mode 100644
index 0000000..d64e601
--- /dev/null
+++ b/contrib/unicode2ragel.rb
@@ -0,0 +1,305 @@
+#!/usr/bin/env ruby
+#
+# This script uses the unicode spec to generate a Ragel state machine
+# that recognizes unicode alphanumeric characters. It generates 5
+# character classes: uupper, ulower, ualpha, udigit, and ualnum.
+# Currently supported encodings are UTF-8 [default] and UCS-4.
+#
+# Usage: unicode2ragel.rb [options]
+# -e, --encoding [ucs4 | utf8] Data encoding
+# -h, --help Show this message
+#
+# This script was originally written as part of the Ferret search
+# engine library.
+#
+# Author: Rakan El-Khalil <rakan@well.com>
+
+require 'optparse'
+require 'open-uri'
+
+ENCODINGS = [ :utf8, :ucs4 ]
+ALPHTYPES = { :utf8 => "unsigned char", :ucs4 => "unsigned int" }
+CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
+
+###
+# Display vars & default option
+
+TOTAL_WIDTH = 80
+RANGE_WIDTH = 23
+@encoding = :utf8
+
+###
+# Option parsing
+
+cli_opts = OptionParser.new do |opts|
+ opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
+ @encoding = o.downcase.to_sym
+ end
+ opts.on("-h", "--help", "Show this message") do
+ puts opts
+ exit
+ end
+end
+
+cli_opts.parse(ARGV)
+unless ENCODINGS.member? @encoding
+ puts "Invalid encoding: #{@encoding}"
+ puts cli_opts
+ exit
+end
+
+##
+# Downloads the document at url and yields every alpha line's hex
+# range and description.
+
+def each_alpha( url, property )
+ open( url ) do |file|
+ file.each_line do |line|
+ next if line =~ /^#/;
+ next if line !~ /; #{property} #/;
+
+ range, description = line.split(/;/)
+ range.strip!
+ description.gsub!(/.*#/, '').strip!
+
+ if range =~ /\.\./
+ start, stop = range.split '..'
+ else start = stop = range
+ end
+
+ yield start.hex .. stop.hex, description
+ end
+ end
+end
+
+###
+# Formats to hex at minimum width
+
+def to_hex( n )
+ r = "%0X" % n
+ r = "0#{r}" unless (r.length % 2).zero?
+ r
+end
+
+###
+# UCS4 is just a straight hex conversion of the unicode codepoint.
+
+def to_ucs4( range )
+ rangestr = "0x" + to_hex(range.begin)
+ rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
+ [ rangestr ]
+end
+
+##
+# 0x00 - 0x7f -> 0zzzzzzz[7]
+# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
+# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
+# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
+
+UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
+
+def to_utf8_enc( n )
+ r = 0
+ if n <= 0x7f
+ r = n
+ elsif n <= 0x7ff
+ y = 0xc0 | (n >> 6)
+ z = 0x80 | (n & 0x3f)
+ r = y << 8 | z
+ elsif n <= 0xffff
+ x = 0xe0 | (n >> 12)
+ y = 0x80 | (n >> 6) & 0x3f
+ z = 0x80 | n & 0x3f
+ r = x << 16 | y << 8 | z
+ elsif n <= 0x10ffff
+ w = 0xf0 | (n >> 18)
+ x = 0x80 | (n >> 12) & 0x3f
+ y = 0x80 | (n >> 6) & 0x3f
+ z = 0x80 | n & 0x3f
+ r = w << 24 | x << 16 | y << 8 | z
+ end
+
+ to_hex(r)
+end
+
+def from_utf8_enc( n )
+ n = n.hex
+ r = 0
+ if n <= 0x7f
+ r = n
+ elsif n <= 0xdfff
+ y = (n >> 8) & 0x1f
+ z = n & 0x3f
+ r = y << 6 | z
+ elsif n <= 0xefffff
+ x = (n >> 16) & 0x0f
+ y = (n >> 8) & 0x3f
+ z = n & 0x3f
+ r = x << 10 | y << 6 | z
+ elsif n <= 0xf7ffffff
+ w = (n >> 24) & 0x07
+ x = (n >> 16) & 0x3f
+ y = (n >> 8) & 0x3f
+ z = n & 0x3f
+ r = w << 18 | x << 12 | y << 6 | z
+ end
+ r
+end
+
+###
+# Given a range, splits it up into ranges that can be continuously
+# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
+# This is not strictly needed since the current [5.1] unicode standard
+# doesn't have ranges that straddle utf8 boundaries. This is included
+# for completeness as there is no telling if that will ever change.
+
+def utf8_ranges( range )
+ ranges = []
+ UTF8_BOUNDARIES.each do |max|
+ if range.begin <= max
+ return ranges << range if range.end <= max
+
+ ranges << range.begin .. max
+ range = (max + 1) .. range.end
+ end
+ end
+ ranges
+end
+
+def build_range( start, stop )
+ size = start.size/2
+ left = size - 1
+ return [""] if size < 1
+
+ a = start[0..1]
+ b = stop[0..1]
+
+ ###
+ # Shared prefix
+
+ if a == b
+ return build_range(start[2..-1], stop[2..-1]).map do |elt|
+ "0x#{a} " + elt
+ end
+ end
+
+ ###
+ # Unshared prefix, end of run
+
+ return ["0x#{a}..0x#{b} "] if left.zero?
+
+ ###
+ # Unshared prefix, not end of run
+ # Range can be 0x123456..0x56789A
+ # Which is equivalent to:
+ # 0x123456 .. 0x12FFFF
+ # 0x130000 .. 0x55FFFF
+ # 0x560000 .. 0x56789A
+
+ ret = []
+ ret << build_range(start, a + "FF" * left)
+
+ ###
+ # Only generate middle range if need be.
+
+ if a.hex+1 != b.hex
+ max = to_hex(b.hex - 1)
+ max = "FF" if b == "FF"
+ ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
+ end
+
+ ###
+ # Don't generate last range if it is covered by first range
+
+ ret << build_range(b + "00" * left, stop) unless b == "FF"
+ ret.flatten!
+end
+
+def to_utf8( range )
+ utf8_ranges( range ).map do |r|
+ build_range to_utf8_enc(r.begin), to_utf8_enc(r.end)
+ end.flatten!
+end
+
+##
+# Perform a 3-way comparison of the number of codepoints advertised by
+# the unicode spec for the given range, the originally parsed range,
+# and the resulting utf8 encoded range.
+
+def count_codepoints( code )
+ code.split(' ').inject(1) do |acc, elt|
+ if elt =~ /0x(.+)\.\.0x(.+)/
+ if @encoding == :utf8
+ acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
+ else
+ acc * ($2.hex - $1.hex + 1)
+ end
+ else
+ acc
+ end
+ end
+end
+
+def is_valid?( range, desc, codes )
+ spec_count = 1
+ spec_count = $1.to_i if desc =~ /\[(\d+)\]/
+ range_count = range.end - range.begin + 1
+
+ sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
+ sum == spec_count and sum == range_count
+end
+
+##
+# Generate the state maching to stdout
+
+def generate_machine( name, property )
+ pipe = " "
+ puts " #{name} = "
+ each_alpha( CHART_URL, property ) do |range, desc|
+
+ codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
+
+ raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
+ is_valid? range, desc, codes
+
+ range_width = codes.map { |a| a.size }.max
+ range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
+
+ desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
+ desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
+
+ if desc.size > desc_width
+ desc = desc[0..desc_width - 4] + "..."
+ end
+
+ codes.each_with_index do |r, idx|
+ desc = "" unless idx.zero?
+ code = "%-#{range_width}s" % r
+ puts " #{pipe} #{code} ##{desc}"
+ pipe = "|"
+ end
+ end
+ puts " ;"
+ puts ""
+end
+
+puts <<EOF
+# The following Ragel file was autogenerated with #{$0}
+# from: #{CHART_URL}
+#
+# It defines ualpha, udigit, ualnum.
+#
+# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
+# and that your input is in #{@encoding}.
+
+%%{
+ machine WChar;
+EOF
+generate_machine( :ualpha, "Alphabetic" )
+generate_machine( :ulower, "Lowercase" )
+generate_machine( :uupper, "Uppercase" )
+puts <<EOF
+ udigit = '0'..'9';
+ ualnum = ualpha | udigit;
+}%%
+EOF