unifying some of the top-level components

including vim syntax, CREDITS, COPYING, examples, contrib.
author: Adrian Thurston <thurston@colm.net> 2019-09-11 18:22:31 -0600
committer: Adrian Thurston <thurston@colm.net> 2019-09-11 18:22:31 -0600
commit: e4f23077edf61818128b355f2aab2b900702ea97 (patch)
tree: 05d3294062b259a3d72e277950e1364c50bbea07 /examples
parent: bccaa853593339c2bac8ddede25f18e1afc91597 (diff)
download: colm-e4f23077edf61818128b355f2aab2b900702ea97.tar.gz
27 files changed, 3270 insertions, 0 deletions
diff --git a/examples/.gitignore b/examples/.gitignore
new file mode 100644
index 00000000..b309591b
--- /dev/null
+++ b/examples/.gitignore
@@ -0,0 +1,39 @@
+/Makefile.in
+/Makefile
+/concurrent
+/concurrent.exe
+/rlscan
+/rlscan.exe
+/clang
+/clang.exe
+/statechart
+/statechart.exe
+/gotocallret
+/gotocallret.exe
+/pullscan
+/pullscan.exe
+/cppscan
+/cppscan.exe
+/format
+/format.exe
+/awkemu
+/awkemu.exe
+/mailbox
+/mailbox.exe
+/atoi
+/atoi.exe
+/params
+/params.exe
+/statechart.cpp
+/gotocallret.cpp
+/clang.c
+/cppscan.cpp
+/mailbox.cpp
+/atoi.cpp
+/pullscan.c
+/concurrent.cpp
+/rlscan.cpp
+/params.c
+/format.c
+/awkemu.c
+/.deps
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 00000000..6ff75544
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+foreach(_example atoi awkemu clang concurrent cppscan format gotocallret
+		mailbox params rlscan statechart pullscan)
+	add_custom_command(
+		OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp"
+		DEPENDS ${_example}.rl
+		COMMAND ragel
+		ARGS -G2 -o "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp"
+			"${CMAKE_CURRENT_LIST_DIR}/${_example}.rl"
+		WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}")
+	add_executable(${_example} "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp")
+endforeach()
diff --git a/examples/Makefile.am b/examples/Makefile.am
new file mode 100644
index 00000000..7cda0171
--- /dev/null
+++ b/examples/Makefile.am
@@ -0,0 +1,72 @@
+
+RAGEL = ../src/ragel
+FLEX = flex
+RE2C = re2c
+
+noinst_PROGRAMS = \
+	atoi concurrent cppscan format gotocallret mailbox params \
+	statechart 
+
+EXTRA_DIST = \
+	gotocallret.rl pullscan.rl concurrent.rl rlscan.rl statechart.rl \
+	params.rl clang.rl cppscan.rl format.rl awkemu.rl mailbox.rl atoi.rl
+
+gotocallret_SOURCES = gotocallret.cpp
+pullscan_SOURCES = pullscan.c
+concurrent_SOURCES = concurrent.cpp
+rlscan_SOURCES = rlscan.cpp
+statechart_SOURCES = statechart.cpp 
+params_SOURCES = params.c
+clang_SOURCES = clang.c 
+cppscan_SOURCES = cppscan.cpp 
+format_SOURCES = format.c
+awkemu_SOURCES = awkemu.c
+mailbox_SOURCES = mailbox.cpp
+atoi_SOURCES = atoi.cpp
+
+gotocallret.cpp: gotocallret.rl
+	$(RAGEL) -G2 -o gotocallret.cpp gotocallret.rl 
+
+pullscan.c: pullscan.rl $(RAGEL) 
+	$(RAGEL) -G2 -o $@ pullscan.rl
+
+concurrent.cpp: concurrent.rl $(RAGEL)
+	$(RAGEL) -G2 -o concurrent.cpp concurrent.rl
+
+rlscan.cpp: rlscan.rl 
+	$(RAGEL) -G2 -o rlscan.cpp rlscan.rl
+
+statechart.cpp: statechart.rl 
+	$(RAGEL) -G2 -o statechart.cpp statechart.rl
+
+params.c: params.rl
+	$(RAGEL) -G2 -o params.c params.rl
+
+clang.c: clang.rl 
+	$(RAGEL) -G2 -o clang.c clang.rl
+
+cppscan.cpp: cppscan.rl 
+	$(RAGEL) -G2 -o $@ cppscan.rl
+
+format.c: format.rl
+	$(RAGEL) -G2 -o format.c format.rl
+
+awkemu.c: awkemu.rl
+	$(RAGEL) -G2 -o awkemu.c awkemu.rl
+
+mailbox.cpp: mailbox.rl
+	$(RAGEL) -G2 -o mailbox.cpp mailbox.rl
+
+atoi.cpp: atoi.rl
+	$(RAGEL) -G2 -o atoi.cpp atoi.rl
+
+###
+
+lex-cppscan.cpp: cppscan.lex
+	$(FLEX) -f -o $@ $<
+
+re2c-cppscan.cpp: cppscan.rec
+	$(RE2C) -s $< > $@
+
+example.cpp: example.rec
+	$(RE2C) -s $< > $@
diff --git a/examples/README b/examples/README
new file mode 100644
index 00000000..12773cb3
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,40 @@
+
+                    Ragel State Machine Compiler -- Examples
+                    ========================================
+
+atoi        -- Converts a string to an integer.
+
+awkemu      -- Perfoms the basic parsing that the awk program perfoms on input.
+               The awk equivalent to awkemu is in awkemu/awkequiv.awk
+
+clang       -- A scanner for a simple C like language. It breaks input up into
+               words, numbers, strings and symbols and strips out whitespace
+               and comments. It is a suitable template for writing a parser
+               that finds a sequence of tokens.
+
+concurrent  -- Demonstrates the ability of ragel to produce parsers that
+               perform independent tasks concurrently.
+
+cppscan     -- A C++ scanner that uses the longest match scanning method. This
+               example differs from other examples of scanning. Each run of the
+               state machine matches one token. This method results in a
+               smaller state machine since the final kleene star is omitted and
+               therefore every state does not need to get all the transitions
+               of the start state.
+
+format      -- Partial printf implementation.
+
+gotocallret -- Demonstrate the use of fgoto, fcall and fret.
+
+mailbox     -- Parses unix mailbox files. It breaks files into messages, and
+               messages into headers and body. It demonstrates Ragel's ability
+               to make parsers for structured file formats.
+
+params      -- Parses command line arguements.
+
+rlscan      -- Lexes Ragel input files.
+
+statechart  -- Demonstrate the use of labels, the epsilon operator, and the
+               join operator for creating machines using the named state and
+               transition list paradigm.  This implementes the same machine as
+               the atoi example.
diff --git a/examples/atoi.rl b/examples/atoi.rl
new file mode 100644
index 00000000..7164b68d
--- /dev/null
+++ b/examples/atoi.rl
@@ -0,0 +1,59 @@
+/*
+ * Convert a string to an integer.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+%%{
+	machine atoi;
+	write data;
+}%%
+
+long long atoi( char *str )
+{
+	char *p = str, *pe = str + strlen( str );
+	int cs;
+	long long val = 0;
+	bool neg = false;
+
+	%%{
+		action see_neg {
+			neg = true;
+		}
+
+		action add_digit { 
+			val = val * 10 + (fc - '0');
+		}
+
+		main := 
+			( '-'@see_neg | '+' )? ( digit @add_digit )+ 
+			'\n';
+
+		# Initialize and execute.
+		write init;
+		write exec;
+	}%%
+
+	if ( neg )
+		val = -1 * val;
+
+	if ( cs < atoi_first_final )
+		fprintf( stderr, "atoi: there was an error\n" );
+
+	return val;
+};
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+	char buf[BUFSIZE];
+	while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+		long long value = atoi( buf );
+		printf( "%lld\n", value );
+	}
+	return 0;
+}
diff --git a/examples/awkemu.rl b/examples/awkemu.rl
new file mode 100644
index 00000000..6615943d
--- /dev/null
+++ b/examples/awkemu.rl
@@ -0,0 +1,116 @@
+/*
+ * Perform the basic line parsing of input performed by awk.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+%%{
+	machine awkemu;
+
+	action start_word {
+		ws[nwords] = fpc;
+	}
+
+	action end_word {
+		we[nwords++] = fpc;
+	}
+
+	action start_line {
+		nwords = 0;
+		ls = fpc;
+	}
+
+	action end_line {
+		printf("endline(%i): ", nwords );
+		fwrite( ls, 1, p - ls, stdout );
+		printf("\n");
+
+		for ( i = 0; i < nwords; i++ ) {
+			printf("  word: ");
+			fwrite( ws[i], 1, we[i] - ws[i], stdout );
+			printf("\n");
+		}
+	}
+
+	# Words in a line.
+	word = ^[ \t\n]+;
+
+	# The whitespace separating words in a line.
+	whitespace = [ \t];
+
+	# The components in a line to break up. Either a word or a single char of
+	# whitespace. On the word capture characters.
+	blineElements = word >start_word %end_word | whitespace;
+
+	# Star the break line elements. Just be careful to decrement the leaving
+	# priority as we don't want multiple character identifiers to be treated as
+	# multiple single char identifiers.
+	line = ( blineElements** '\n' ) >start_line @end_line;
+
+	# Any number of lines.
+	main := line*;
+}%%
+
+%% write data noerror nofinal;
+
+#define MAXWORDS 256
+#define BUFSIZE 4096
+char buf[BUFSIZE];
+
+int main()
+{
+	int i, nwords = 0;
+	char *ls = 0;
+	char *ws[MAXWORDS];
+	char *we[MAXWORDS];
+
+	int cs;
+	int have = 0;
+
+	%% write init;
+
+	while ( 1 ) {
+		char *p, *pe, *data = buf + have;
+		int len, space = BUFSIZE - have;
+		/* fprintf( stderr, "space: %i\n", space ); */
+
+		if ( space == 0 ) { 
+			fprintf(stderr, "buffer out of space\n");
+			exit(1);
+		}
+
+		len = fread( data, 1, space, stdin );
+		/* fprintf( stderr, "len: %i\n", len ); */
+		if ( len == 0 )
+			break;
+
+		/* Find the last newline by searching backwards. This is where 
+		 * we will stop processing on this iteration. */
+		p = buf;
+		pe = buf + have + len - 1;
+		while ( *pe != '\n' && pe >= buf )
+			pe--;
+		pe += 1;
+
+		/* fprintf( stderr, "running on: %i\n", pe - p ); */
+
+		%% write exec;
+
+		/* How much is still in the buffer. */
+		have = data + len - pe;
+		if ( have > 0 )
+			memmove( buf, pe, have );
+
+		/* fprintf(stderr, "have: %i\n", have ); */
+
+		if ( len < space )
+			break;
+	}
+
+	if ( have > 0 )
+		fprintf(stderr, "input not newline terminated\n");
+	return 0;
+}
diff --git a/examples/awkequiv.awk b/examples/awkequiv.awk
new file mode 100755
index 00000000..9877dd36
--- /dev/null
+++ b/examples/awkequiv.awk
@@ -0,0 +1,10 @@
+#!/usr/bin/awk -f
+#
+
+
+{
+	print "endline(" NF "): " $0
+	for ( i = 1; i <= NF; i++ ) {
+		print "  word: " $i
+	}
+}
diff --git a/examples/clang.rl b/examples/clang.rl
new file mode 100644
index 00000000..60491e5e
--- /dev/null
+++ b/examples/clang.rl
@@ -0,0 +1,150 @@
+/*
+ * A mini C-like language scanner.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+%%{
+	machine clang;
+
+	newline = '\n' @{curline += 1;};
+	any_count_line = any | newline;
+
+	# Consume a C comment.
+	c_comment := any_count_line* :>> '*/' @{fgoto main;};
+
+	main := |*
+
+	# Alpha numberic characters or underscore.
+	alnum_u = alnum | '_';
+
+	# Alpha charactres or underscore.
+	alpha_u = alpha | '_';
+
+	# Symbols. Upon entering clear the buffer. On all transitions
+	# buffer a character. Upon leaving dump the symbol.
+	( punct - [_'"] ) {
+		printf( "symbol(%i): %c\n", curline, ts[0] );
+	};
+
+	# Identifier. Upon entering clear the buffer. On all transitions
+	# buffer a character. Upon leaving, dump the identifier.
+	alpha_u alnum_u* {
+		printf( "ident(%i): ", curline );
+		fwrite( ts, 1, te-ts, stdout );
+		printf("\n");
+	};
+
+	# Single Quote.
+	sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
+	'\'' . sliteralChar* . '\'' {
+		printf( "single_lit(%i): ", curline );
+		fwrite( ts, 1, te-ts, stdout );
+		printf("\n");
+	};
+
+	# Double Quote.
+	dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
+	'"' . dliteralChar* . '"' {
+		printf( "double_lit(%i): ", curline );
+		fwrite( ts, 1, te-ts, stdout );
+		printf("\n");
+	};
+
+	# Whitespace is standard ws, newlines and control codes.
+	any_count_line - 0x21..0x7e;
+
+	# Describe both c style comments and c++ style comments. The
+	# priority bump on tne terminator of the comments brings us
+	# out of the extend* which matches everything.
+	'//' [^\n]* newline;
+
+	'/*' { fgoto c_comment; };
+
+	# Match an integer. We don't bother clearing the buf or filling it.
+	# The float machine overlaps with int and it will do it.
+	digit+ {
+		printf( "int(%i): ", curline );
+		fwrite( ts, 1, te-ts, stdout );
+		printf("\n");
+	};
+
+	# Match a float. Upon entering the machine clear the buf, buffer
+	# characters on every trans and dump the float upon leaving.
+	digit+ '.' digit+ {
+		printf( "float(%i): ", curline );
+		fwrite( ts, 1, te-ts, stdout );
+		printf("\n");
+	};
+
+	# Match a hex. Upon entering the hex part, clear the buf, buffer characters
+	# on every trans and dump the hex on leaving transitions.
+	'0x' xdigit+ {
+		printf( "hex(%i): ", curline );
+		fwrite( ts, 1, te-ts, stdout );
+		printf("\n");
+	};
+
+	*|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 128
+
+void scanner()
+{
+	static char buf[BUFSIZE];
+	int cs, act, have = 0, curline = 1;
+	char *ts, *te = 0;
+	int done = 0;
+
+	%% write init;
+
+	while ( !done ) {
+		char *p = buf + have, *pe, *eof = 0;
+		int len, space = BUFSIZE - have;
+		
+		if ( space == 0 ) {
+			/* We've used up the entire buffer storing an already-parsed token
+			 * prefix that must be preserved. */
+			fprintf(stderr, "OUT OF BUFFER SPACE\n" );
+			exit(1);
+		}
+
+		len = fread( p, 1, space, stdin );
+		pe = p + len;
+
+		/* Check if this is the end of file. */
+		if ( len < space ) {
+			eof = pe;
+			done = 1;
+		}
+			
+		%% write exec;
+
+		if ( cs == clang_error ) {
+			fprintf(stderr, "PARSE ERROR\n" );
+			break;
+		}
+
+		if ( ts == 0 )
+			have = 0;
+		else {
+			/* There is a prefix to preserve, shift it over. */
+			have = pe - ts;
+			memmove( buf, ts, have );
+			te = buf + (te-ts);
+			ts = buf;
+		}
+	}
+}
+
+int main()
+{
+	scanner();
+	return 0;
+}
+
diff --git a/examples/concurrent.rl b/examples/concurrent.rl
new file mode 100644
index 00000000..224f9601
--- /dev/null
+++ b/examples/concurrent.rl
@@ -0,0 +1,126 @@
+/*
+ * Show off concurrent abilities.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+struct Concurrent
+{
+	int cur_char;
+	int start_word;
+	int start_comment;
+	int start_literal;
+
+	int cs;
+
+	int init( );
+	int execute( const char *data, int len, bool isEof );
+	int finish( );
+};
+
+%%{
+	machine Concurrent;
+
+	action next_char {
+		cur_char += 1;
+	}
+
+	action start_word {
+		start_word = cur_char;
+	}
+	action end_word {
+		cout << "word: " << start_word << 
+				" " << cur_char-1 << endl;
+	}
+
+	action start_comment {
+		start_comment = cur_char;
+	}
+	action end_comment {
+		cout << "comment: " << start_comment <<
+				" " << cur_char-1 << endl;
+	}
+
+	action start_literal {
+		start_literal = cur_char;
+	}
+	action end_literal {
+		cout << "literal: " << start_literal <<
+				" " << cur_char-1 << endl;
+	}
+
+	# Count characters.
+	chars = ( any @next_char )*;
+
+	# Words are non-whitespace. 
+	word = ( any-space )+ >start_word %end_word;
+	words = ( ( word | space ) $1 %0 )*;
+
+	# Finds C style comments. 
+	comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment;
+	comments = ( comment | any )**;
+
+	# Finds single quoted strings. 
+	literalChar = ( any - ['\\] ) | ( '\\' . any );
+	literal = ('\'' literalChar* '\'' ) >start_literal %end_literal;
+	literals = ( ( literal | (any-'\'') ) $1 %0 )*;
+
+	main := chars | words | comments | literals;
+}%%
+
+%% write data;
+
+int Concurrent::init( )
+{
+	%% write init;
+	cur_char = 0;
+	return 1;
+}
+
+int Concurrent::execute( const char *data, int len, bool isEof )
+{
+	const char *p = data;
+	const char *pe = data + len;
+	const char *eof = isEof ? pe : 0;
+
+	%% write exec;
+
+	if ( cs == Concurrent_error )
+		return -1;
+	if ( cs >= Concurrent_first_final )
+		return 1;
+	return 0;
+}
+
+int Concurrent::finish( )
+{
+	if ( cs == Concurrent_error )
+		return -1;
+	if ( cs >= Concurrent_first_final )
+		return 1;
+	return 0;
+}
+
+Concurrent concurrent;
+char buf[BUFSIZE];
+
+int main()
+{
+	concurrent.init();
+	while ( 1 ) {
+		int len = fread( buf, 1, BUFSIZE, stdin );
+		concurrent.execute( buf, len, len != BUFSIZE );
+		if ( len != BUFSIZE )
+			break;
+	}
+
+	if ( concurrent.finish() <= 0 )
+		cerr << "concurrent: error parsing input" << endl;
+	return 0;
+}
diff --git a/examples/cppscan.lex b/examples/cppscan.lex
new file mode 100644
index 00000000..fb662538
--- /dev/null
+++ b/examples/cppscan.lex
@@ -0,0 +1,143 @@
+/*
+ * flex equivalent to cppscan.rl
+ */
+
+%{
+
+#include <stdio.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+	printf( "<%i> ", tok );
+	for ( int i = 0; i < len; i++ )
+		fputc( data[i], stdout );
+	fputc( '\n', stdout );
+
+	/* Count newlines and columns. This code is here mainly for having some
+	 * code in the token routine when commenting out the above output during
+	 * performance testing. */
+	for ( int i = 0; i < len; i ++ ) {
+		if ( data[i] == '\n' ) {
+			line += 1;
+			col = 1;
+		}
+		else {
+			col += 1;
+		}
+	}
+}
+
+
+%}
+
+%x COMMENT
+
+FRACT_CONST		[0-9]*\.[0-9]+|[0-9]+\.
+EXPONENT		[eE][+\-]?[0-9]+
+FLOAT_SUFFIX	[flFL]
+
+%%
+
+	/* Single and double literals. */
+L?\'([^\'\\\n]|\\.)*\' {
+	token( TK_Slit, yytext, yyleng );
+}
+
+L?\"([^\"\\\n]|\\.)*\" {
+	token( TK_Dlit, yytext, yyleng );
+}
+
+[a-zA-Z_][a-zA-Z0-9_]* {
+	token( TK_Id, yytext, yyleng );
+}
+
+{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? {
+	token( TK_Float, yytext, yyleng );
+}
+
+(0|[1-9][0-9]*)[ulUL]{0,3} {
+	token( TK_IntegerDecimal, yytext, yyleng );
+}
+
+0[0-9]+[ulUL]{0,2} {
+	token( TK_IntegerOctal, yytext, yyleng );
+}
+
+0x[0-9a-fA-F]+[ulUL]{0,2} {
+	token( TK_IntegerHex, yytext, yyleng );
+}
+
+:: token( TK_NameSep, yytext, yyleng );
+== token( TK_EqualsEquals, yytext, yyleng );
+!= token( TK_NotEquals, yytext, yyleng );
+&& token( TK_AndAnd, yytext, yyleng );
+\|\| token( TK_OrOr, yytext, yyleng );
+\*= token( TK_MultAssign, yytext, yyleng );
+\/= token( TK_DivAssign, yytext, yyleng );
+%= token( TK_PercentAssign, yytext, yyleng );
+\+= token( TK_PlusAssign, yytext, yyleng );
+-= token( TK_MinusAssign, yytext, yyleng );
+&= token( TK_AmpAssign, yytext, yyleng );
+^= token( TK_CaretAssign, yytext, yyleng );
+\|= token( TK_BarAssign, yytext, yyleng );
+\+\+ token( TK_PlusPlus, yytext, yyleng );
+-- token( TK_MinusMinus, yytext, yyleng );
+-> token( TK_Arrow, yytext, yyleng );
+->\* token( TK_ArrowStar, yytext, yyleng );
+\.\* token( TK_DotStar, yytext, yyleng );
+\.\.\. token( TK_DotDotDot, yytext, yyleng );
+
+\/\*				BEGIN(COMMENT);
+<COMMENT>\*\/		BEGIN(INITIAL);
+<COMMENT>(.|\n)		{ }
+
+\/\/.*\n			{}
+[^!-~]+				{}
+
+[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng );
+	
+%%
+
+int yywrap()
+{
+	/* Once the input is done, no more. */
+	return 1;
+}
+
+int main()
+{
+	yylex();
+}
diff --git a/examples/cppscan.rec b/examples/cppscan.rec
new file mode 100644
index 00000000..43f297d8
--- /dev/null
+++ b/examples/cppscan.rec
@@ -0,0 +1,183 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+	printf( "<%i> ", tok );
+	for ( int i = 0; i < len; i++ )
+		fputc( data[i], stdout );
+	fputc( '\n', stdout );
+
+	/* Count newlines and columns. This code is here mainly for having some
+	 * code in the token routine when commenting out the above output during
+	 * performance testing. */
+	for ( int i = 0; i < len; i ++ ) {
+		if ( data[i] == '\n' ) {
+			line += 1;
+			col = 1;
+		}
+		else {
+			col += 1;
+		}
+	}
+}
+
+#define BUFSIZE 8192
+char buf[BUFSIZE];
+
+void fill( int n )
+{
+	printf("fill(%i)\n", n);
+	exit(1);
+}
+
+int main()
+{
+	char *start, *p = buf, *lim = buf, *marker;
+	int len, have, want, shift;
+	int done = 0;
+	
+#define YYCTYPE         char
+
+#define YYCURSOR        p
+#define YYLIMIT         lim
+#define YYMARKER        marker
+
+#define YYFILL(n)      { \
+		if ( ! done ) { \
+			have = lim-start; \
+			if ( start > buf ) { \
+				shift = start-buf; \
+				memmove( buf, start, have ); \
+				start -= shift; \
+				p -= shift; \
+				lim -= shift; \
+				marker -= shift; \
+			} \
+			want = BUFSIZE - have - 1; \
+			len = fread( lim, 1, want, stdin ); \
+			lim += len; \
+			if ( len < want ) { \
+				*lim++ = 0; \
+				done = 1; \
+			} \
+		} \
+	}
+
+again:
+	start = p;
+
+/*!re2c
+
+ANY				= [\000-\377];
+FRACTCONST		= ( [0-9]* "." [0-9]+ ) | [0-9]+ ".";
+EXPONENT		= [eE] [+\-]? [0-9]+;
+FLOATSUFFIX		= [flFL];
+
+	"L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" {
+		token( TK_Slit, start, p-start );
+		goto again;
+	}
+
+	"L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" {
+		token( TK_Dlit, start, p-start );
+		goto again;
+	}
+
+	[a-zA-Z_][a-zA-Z0-9_]* {
+		token( TK_Id, start, p-start );
+		goto again;
+	}
+
+	( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) {
+		token( TK_Float, start, p-start );
+		goto again;
+	}
+
+
+	( "0" | [1-9][0-9]* ) [ulUL]* {
+		token( TK_IntegerDecimal, start, p-start );
+		goto again;
+	}
+
+	"0" [0-9]+ [ulUL]* {
+		token( TK_IntegerOctal, start, p-start );
+		goto again;
+	}
+
+	"0x" [0-9a-fA-F]+[ulUL]* {
+		token( TK_IntegerHex, start, p-start );
+		goto again;
+	}
+
+	"::"	{ token( TK_NameSep, start, p-start ); goto again; }
+	"=="	{ token( TK_EqualsEquals, start, p-start ); goto again; }
+	"!="	{ token( TK_NotEquals, start, p-start ); goto again; }
+	"&&"	{ token( TK_AndAnd, start, p-start ); goto again; }
+	"||"	{ token( TK_OrOr, start, p-start ); goto again; }
+	"*="	{ token( TK_MultAssign, start, p-start ); goto again; }
+	"/="	{ token( TK_DivAssign, start, p-start ); goto again; }
+	"%="	{ token( TK_PercentAssign, start, p-start ); goto again; }
+	"+="	{ token( TK_PlusAssign, start, p-start ); goto again; }
+	"-="	{ token( TK_MinusAssign, start, p-start ); goto again; }
+	"&="	{ token( TK_AmpAssign, start, p-start ); goto again; }
+	"^="	{ token( TK_CaretAssign, start, p-start ); goto again; }
+	"|="	{ token( TK_BarAssign, start, p-start ); goto again; }
+	"++"	{ token( TK_PlusPlus, start, p-start ); goto again; }
+	"--"	{ token( TK_MinusMinus, start, p-start ); goto again; }
+	"->"	{ token( TK_Arrow, start, p-start ); goto again; }
+	"->*"	{ token( TK_ArrowStar, start, p-start ); goto again; }
+	".*"	{ token( TK_DotStar, start, p-start ); goto again; }
+	"..."	{ token( TK_DotDotDot, start, p-start ); goto again; }
+
+	"/*" { goto comment; }
+	"//" (ANY\"\n")* "\n"	{ goto again; }
+	[\001-\040\177]+		{ goto again; }
+
+	[\041-\057\072-\100\133-\140\173-\176]	{
+		token( *start, start, p-start );
+		goto again;
+	}
+	"\000"					{ return 0; }
+*/
+
+comment:
+/*!re2c
+	"*/"					{ goto again; }
+	ANY						{ goto comment; }
+*/
+}
diff --git a/examples/cppscan.rl b/examples/cppscan.rl
new file mode 100644
index 00000000..1ead5aa6
--- /dev/null
+++ b/examples/cppscan.rl
@@ -0,0 +1,208 @@
+/*
+ * A C++ scanner. Uses the longest match construction.
+ * << <= <<= >> >= >>= are left out since angle brackets are used in templates.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <iostream>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+#define BUFSIZE 16384
+
+/* EOF char used to flush out that last token. This should be a whitespace
+ * token. */
+
+#define LAST_CHAR 0
+
+using std::cerr;
+using std::cout;
+using std::cin;
+using std::endl;
+
+static char buf[BUFSIZE];
+static int line = 1, col = 1;
+static char *ts, *te;
+static int act, have = 0;
+static int cs;
+
+%%{
+	machine Scanner; 
+	write data nofinal;
+
+	# Floating literals.
+	fract_const = digit* '.' digit+ | digit+ '.';
+	exponent = [eE] [+\-]? digit+;
+	float_suffix = [flFL];
+
+	c_comment := 
+		any* :>> '*/'
+		@{ fgoto main; };
+
+	main := |*
+
+	# Single and double literals.
+	( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) 
+		{token( TK_Slit );};
+	( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) 
+		{token( TK_Dlit );};
+
+	# Identifiers
+	( [a-zA-Z_] [a-zA-Z0-9_]* ) 
+		{token( TK_Id );};
+
+	# Floating literals.
+	( fract_const exponent? float_suffix? | digit+ exponent float_suffix? ) 
+		{token( TK_Float );};
+	
+	# Integer decimal. Leading part buffered by float.
+	( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) 
+		{token( TK_IntegerDecimal );};
+
+	# Integer octal. Leading part buffered by float.
+	( '0' [0-9]+ [ulUL]{0,2} ) 
+		{token( TK_IntegerOctal );};
+
+	# Integer hex. Leading 0 buffered by float.
+	( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) 
+		{token( TK_IntegerHex );};
+
+	# Only buffer the second item, first buffered by symbol. */
+	'::' {token( TK_NameSep );};
+	'==' {token( TK_EqualsEquals );};
+	'!=' {token( TK_NotEquals );};
+	'&&' {token( TK_AndAnd );};
+	'||' {token( TK_OrOr );};
+	'*=' {token( TK_MultAssign );};
+	'/=' {token( TK_DivAssign );};
+	'%=' {token( TK_PercentAssign );};
+	'+=' {token( TK_PlusAssign );};
+	'-=' {token( TK_MinusAssign );};
+	'&=' {token( TK_AmpAssign );};
+	'^=' {token( TK_CaretAssign );};
+	'|=' {token( TK_BarAssign );};
+	'++' {token( TK_PlusPlus );};
+	'--' {token( TK_MinusMinus );};
+	'->' {token( TK_Arrow );};
+	'->*' {token( TK_ArrowStar );};
+	'.*' {token( TK_DotStar );};
+
+	# Three char compounds, first item already buffered. */
+	'...' {token( TK_DotDotDot );};
+
+	# Single char symbols.
+	( punct - [_"'] ) {token( ts[0] );};
+
+	# Comments and whitespace.
+	'/*' { fgoto c_comment; };
+	'//' [^\n]* '\n';
+	( any - 33..126 )+;
+
+	*|;
+}%%
+
+void token( int tok )
+{
+	char *data = ts;
+	int len = te - ts;
+
+	cout << '<' << tok << "> ";
+	cout.write( data, len );
+	cout << '\n';
+	
+	/* Count newlines and columns. This code is here mainly for having some
+	 * code in the token routine when commenting out the above output during
+	 * performance testing. */
+	for ( int i = 0; i < len; i ++ ) {
+		if ( data[i] == '\n' ) {
+			line += 1;
+			col = 1;
+		}
+		else {
+			col += 1;
+		}
+	}
+}
+
+int main()
+{
+	std::ios::sync_with_stdio(false);
+
+	%% write init;
+
+	/* Do the first read. */
+	bool done = false;
+	while ( !done ) {
+		char *p = buf + have;
+		int space = BUFSIZE - have;
+
+		if ( space == 0 ) {
+			/* We filled up the buffer trying to scan a token. */
+			cerr << "OUT OF BUFFER SPACE" << endl;
+			exit(1);
+		}
+
+		cin.read( p, space );
+		int len = cin.gcount();
+		char *pe = p + len;
+		char *eof = 0;
+
+		/* If we see eof then append the EOF char. */
+	 	if ( cin.eof() ) {
+			eof = pe;
+			done = true;
+		}
+
+		%% write exec;
+
+		/* Check if we failed. */
+		if ( cs == Scanner_error ) {
+			/* Machine failed before finding a token. */
+			cerr << "PARSE ERROR" << endl;
+			exit(1);
+		}
+
+		/* Now set up the prefix. */
+		if ( ts == 0 )
+			have = 0;
+		else {
+			/* There is data that needs to be shifted over. */
+			have = pe - ts;
+			memmove( buf, ts, have );
+			te -= (ts-buf);
+			ts = buf;
+		}
+	}
+
+	return 0;
+}
diff --git a/examples/format.rl b/examples/format.rl
new file mode 100644
index 00000000..f8a37beb
--- /dev/null
+++ b/examples/format.rl
@@ -0,0 +1,191 @@
+/*
+ * Partial printf implementation.
+ */
+
+#define BUFLEN 1024
+#include <stdio.h>
+
+typedef void (*WriteFunc)( char *data, int len );
+
+struct format
+{
+	char buf[BUFLEN+1];
+	int buflen;
+	WriteFunc write;
+
+	int flags;
+	int width;
+	int prec;
+	int cs;
+};
+
+void do_conv( struct format *fsm, char c )
+{
+	printf( "flags: %x\n", fsm->flags );
+	printf( "width: %i\n", fsm->width );
+	printf( "prec: %i\n", fsm->prec );
+	printf( "conv: %c\n", c );
+	printf( "\n" );
+}
+
+#define FL_HASH          0x01
+#define FL_ZERO          0x02
+#define FL_DASH          0x04
+#define FL_SPACE         0x08
+#define FL_PLUS          0x10
+
+#define FL_HAS_WIDTH   0x0100
+#define FL_WIDTH_ARG   0x0200
+#define FL_HAS_PREC    0x0400
+#define FL_PREC_ARG    0x0800
+
+#define FL_LEN_H     0x010000
+#define FL_LEN_HH    0x020000
+#define FL_LEN_L     0x040000
+#define FL_LEN_LL    0x080000
+
+%%{
+	machine format;
+	access fsm->;
+
+	action clear {
+		fsm->flags = 0;
+		fsm->width = 0;
+		fsm->prec = 0;
+	}
+
+	# A non-zero number.
+	nznum = [1-9] [0-9]*;
+
+	# Width
+	action width_num { fsm->width = 10 * fsm->width + (fc-'0'); }
+	action width_arg { fsm->flags |= FL_WIDTH_ARG; }
+	action width { fsm->flags |= FL_HAS_WIDTH; }
+	width = ( ( nznum $width_num | '*' @width_arg ) %width )?;
+
+	# Precision
+	action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); }
+	action prec_arg { fsm->flags |= FL_PREC_ARG; }
+	action prec { fsm->flags |= FL_HAS_PREC; }
+	precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?;
+
+	# Flags
+	action flags_hash { fsm->flags |= FL_HASH; }
+	action flags_zero { fsm->flags |= FL_ZERO; }
+	action flags_dash { fsm->flags |= FL_DASH; }
+	action flags_space { fsm->flags |= FL_SPACE; }
+	action flags_plus { fsm->flags |= FL_PLUS; }
+
+	flags = ( 
+		'#' @flags_hash |
+		'0' @flags_zero |
+		'-' @flags_dash |
+		' ' @flags_space |
+		'+' @flags_plus )*;
+
+	action length_h  { fsm->flags |= FL_LEN_H; }
+	action length_l  { fsm->flags |= FL_LEN_L; }
+	action length_hh { fsm->flags |= FL_LEN_HH; }
+	action length_ll { fsm->flags |= FL_LEN_LL; }
+
+	# Must use leaving transitions on 'h' and 'l' because they are
+	# prefixes for 'hh' and 'll'.
+	length = (
+		'h' %length_h | 
+		'l' %length_l |
+		'hh' @length_hh |
+		'll' @length_ll )?;
+	
+	action conversion { 
+		do_conv( fsm, fc );
+	}
+
+	conversion = [diouxXcsp] @conversion;
+
+	fmt_spec = 
+			'%' @clear 
+			flags
+			width
+			precision
+			length
+			conversion;
+	
+	action emit {
+		if ( fsm->buflen == BUFLEN ) {
+			fsm->write( fsm->buf, fsm->buflen );
+			fsm->buflen = 0;
+		}
+		fsm->buf[fsm->buflen++] = fc;
+	}
+
+	action finish_ok {
+		if ( fsm->buflen > 0 )
+			fsm->write( fsm->buf, fsm->buflen );
+	}
+	action finish_err {
+		printf("EOF IN FORMAT\n");
+	}
+	action err_char {
+		printf("ERROR ON CHAR: 0x%x\n", fc );
+	}
+
+	main := ( 
+			[^%] @emit | 
+			'%%' @emit | 
+			fmt_spec 
+		)* @/finish_err %/finish_ok $!err_char;
+}%%
+
+%% write data;
+
+void format_init( struct format *fsm )
+{
+	fsm->buflen = 0;
+	%% write init;
+}
+
+void format_execute( struct format *fsm, const char *data, int len, int isEof )
+{
+	const char *p = data;
+	const char *pe = data + len;
+	const char *eof = isEof ? pe : 0;
+
+	%% write exec;
+}
+
+int format_finish( struct format *fsm )
+{
+	if ( fsm->cs == format_error )
+		return -1;
+	if ( fsm->cs >= format_first_final )
+		return 1;
+	return 0;
+}
+
+
+#define INPUT_BUFSIZE 2048
+
+struct format fsm;
+char buf[INPUT_BUFSIZE];
+
+void write(char *data, int len )
+{
+	fwrite( data, 1, len, stdout );
+}
+
+int main()
+{
+	fsm.write = write;
+	format_init( &fsm );
+	while ( 1 ) {
+		int len = fread( buf, 1, INPUT_BUFSIZE, stdin );
+		int eof = len != INPUT_BUFSIZE;
+		format_execute( &fsm, buf, len, eof );
+		if ( eof )
+			break;
+	}
+	if ( format_finish( &fsm ) <= 0 )
+		printf("FAIL\n");
+	return 0;
+}
+
diff --git a/examples/go/.gitignore b/examples/go/.gitignore
new file mode 100644
index 00000000..f8b421d6
--- /dev/null
+++ b/examples/go/.gitignore
@@ -0,0 +1,5 @@
+/*.dot
+/*.go
+/atoi
+/rpn
+/url
diff --git a/examples/go/Makefile b/examples/go/Makefile
new file mode 100644
index 00000000..536afcc7
--- /dev/null
+++ b/examples/go/Makefile
@@ -0,0 +1,32 @@
+ragel = ragel
+
+check: atoi rpn url
+	./atoi
+	./rpn
+	./url
+	@echo PASS
+
+graph: atoi.dot rpn.dot url.dot url_authority.dot
+	xdot atoi.dot
+	xdot rpn.dot
+	xdot url.dot
+	xdot url_authority.dot
+
+atoi: atoi.go
+atoi.go: atoi.rl
+atoi.dot: atoi.rl
+
+rpn: rpn.go
+rpn.go: rpn.rl
+rpn.dot: rpn.rl
+
+url: url.go url_authority.go
+url.go: url.rl
+url.dot: url.rl
+url_authority.go: url_authority.rl
+url_authority.dot: url_authority.rl
+
+clean:       ; rm -f *.go *.dot atoi rpn url
+%: %.go      ; go build -o $@ $^
+%.go: %.rl   ; $(ragel) -Z -T0 -o $@ $<
+%.dot: %.rl  ; $(ragel) -V -Z -p -o $@ $<
diff --git a/examples/go/README b/examples/go/README
new file mode 100644
index 00000000..bdb924b8
--- /dev/null
+++ b/examples/go/README
@@ -0,0 +1,36 @@
+.. -*-rst-*-
+
+Ragel Examples for Go
+=====================
+
+These examples serve the following purposes:
+
+- Help you learn Ragel
+- Test the correctness of the code I wrote for Ragel
+- Benchmark Ragel's performance on your machine
+- And hopefully give you some code you can steal ;]
+
+To get started you should first ``make install`` ragel.  Then navigate
+to this directory and run::
+
+    make
+
+To automatically compile/test/benchmark these examples.
+
+The following examples are provided:
+
+- atoi.rl: Convert string to integer (very simple)
+- rpn.rl: Reverse polish notation calculator (simple)
+- url.rl: Very fast and robust HTTP/SIP URL parser (very complicated)
+
+To see graphviz diagrams of the state machines generated by Ragel in
+these examples, run the following commands::
+
+    sudo apt-get install xdot
+    make graph
+
+Those diagrams (along with the pdf manual) are super important for
+troubleshooting and simplifying your Ragel code.
+
+I truly hope these examples help you in your personal and professional
+endeavors.  If you have any questions my email is: jtunney@gmail.com
diff --git a/examples/go/atoi.rl b/examples/go/atoi.rl
new file mode 100644
index 00000000..97c5163e
--- /dev/null
+++ b/examples/go/atoi.rl
@@ -0,0 +1,89 @@
+// -*-go-*-
+//
+// Convert a string to an integer.
+//
+// To compile:
+//
+//   ragel -Z -T0 -o atoi.go atoi.rl
+//   go build -o atoi atoi.go
+//   ./atoi
+//
+// To show a diagram of your state machine:
+//
+//   ragel -V -Z -p -o atoi.dot atoi.rl
+//   xdot atoi.dot
+//
+
+package main
+
+import (
+	"os"
+	"fmt"
+)
+
+%%{
+	machine atoi;
+	write data;
+}%%
+
+func atoi(data string) (val int) {
+	cs, p, pe := 0, 0, len(data)
+	neg := false
+
+	%%{
+		action see_neg   { neg = true }
+		action add_digit { val = val * 10 + (int(fc) - '0') }
+
+		main :=
+			( '-'@see_neg | '+' )? ( digit @add_digit )+
+			'\n'?
+			;
+
+		write init;
+		write exec;
+	}%%
+
+	if neg {
+		val = -1 * val;
+	}
+
+	if cs < atoi_first_final {
+		fmt.Println("atoi: there was an error:", cs, "<", atoi_first_final)
+		fmt.Println(data)
+		for i := 0; i < p; i++ {
+			fmt.Print(" ")
+		}
+		fmt.Println("^")
+	}
+
+	return val
+}
+
+//////////////////////////////////////////////////////////////////////
+
+type atoiTest struct {
+	s string
+	v int
+}
+
+var atoiTests = []atoiTest{
+	atoiTest{"7", 7},
+	atoiTest{"666", 666},
+	atoiTest{"-666", -666},
+	atoiTest{"+666", 666},
+	atoiTest{"1234567890", 1234567890},
+	atoiTest{"+1234567890\n", 1234567890},
+	// atoiTest{"+ 1234567890", 1234567890}, // i will fail
+}
+
+func main() {
+	res := 0
+	for _, test := range atoiTests {
+		res := atoi(test.s)
+		if res != test.v {
+			fmt.Fprintf(os.Stderr, "FAIL atoi(%#v) != %#v\n", test.s, test.v)
+			res = 1
+		}
+	}
+	os.Exit(res)
+}
diff --git a/examples/go/rpn.rl b/examples/go/rpn.rl
new file mode 100644
index 00000000..2ad0a2db
--- /dev/null
+++ b/examples/go/rpn.rl
@@ -0,0 +1,159 @@
+// -*-go-*-
+//
+// Reverse Polish Notation Calculator
+// Copyright (c) 2010 J.A. Roberts Tunney
+// MIT License
+//
+// To compile:
+//
+//   ragel -Z -T0 -o rpn.go rpn.rl
+//   go build -o rpn rpn.go
+//   ./rpn
+//
+// To show a diagram of your state machine:
+//
+//   ragel -V -Z -p -o rpn.dot rpn.rl
+//   xdot -Tpng -o rpn.png rpn.dot
+//
+
+package main
+
+import (
+  "errors"
+	"fmt"
+  "os"
+	"strconv"
+)
+
+type stack struct {
+	items []int
+	count int
+}
+
+func (s *stack) pop() int {
+	s.count--
+	v := s.items[s.count]
+	return v
+}
+
+func (s *stack) push(v int) {
+	s.items[s.count] = v
+	s.count++
+}
+
+func abs(v int) int {
+	if v < 0 {
+		v = -v
+	}
+	return v
+}
+
+%% machine rpn;
+%% write data;
+
+func rpn(data string) (res int, err error) {
+	// p, pe, eof := 0, len(data), len(data)
+	cs, p, pe := 0, 0, len(data)
+	mark := 0
+	st := &stack{items: make([]int, 128), count: 0}
+
+	%%{
+		action mark { mark = p }
+		action push { x, _ := strconv.Atoi(data[mark:p]); st.push(x) }
+		action add  { y, x := st.pop(), st.pop(); st.push(x + y) }
+		action sub  { y, x := st.pop(), st.pop(); st.push(x - y) }
+		action mul  { y, x := st.pop(), st.pop(); st.push(x * y) }
+		action div  { y, x := st.pop(), st.pop(); st.push(x / y) }
+		action abs  { st.push(abs(st.pop())) }
+		action abba { st.push(666) }
+
+		stuff  = digit+ >mark %push
+		       | '+' @add
+		       | '-' @sub
+		       | '*' @mul
+		       | '/' @div
+		       | 'abs' %abs
+		       | 'add' %add
+		       | 'abba' %abba
+		       ;
+
+		main := ( space | stuff space )* ;
+
+		write init;
+		write exec;
+	}%%
+
+	if cs < rpn_first_final {
+		if p == pe {
+			return 0, errors.New("unexpected eof")
+		} else {
+			return 0, errors.New(fmt.Sprintf("error at position %d", p))
+		}
+	}
+
+	if st.count == 0 {
+		return 0, errors.New("rpn stack empty on result")
+	}
+
+	return st.pop(), nil
+}
+
+//////////////////////////////////////////////////////////////////////
+
+type rpnTest struct {
+	s string
+	v int
+}
+
+var rpnTests = []rpnTest{
+	rpnTest{"666\n", 666},
+	rpnTest{"666 111\n", 111},
+	rpnTest{"4 3 add\n", 7},
+	rpnTest{"4 3 +\n", 7},
+	rpnTest{"4 3 -\n", 1},
+	rpnTest{"4 3 *\n", 12},
+	rpnTest{"6 2 /\n", 3},
+	rpnTest{"0 3 -\n", -3},
+	rpnTest{"0 3 - abs\n", 3},
+	rpnTest{" 2  2 + 3 - \n", 1},
+	rpnTest{"10 7 3 2 * - +\n", 11},
+	rpnTest{"abba abba add\n", 1332},
+}
+
+type rpnFailTest struct {
+	s string
+	e string
+}
+
+var rpnFailTests = []rpnFailTest{
+	rpnFailTest{"\n", "rpn stack empty on result"},
+}
+
+func main() {
+	rc := 0
+
+	for _, test := range rpnTests {
+		res, err := rpn(test.s)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) %s\n", test.s, err)
+			rc = 1
+		} else if res != test.v {
+			fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) -> %#v != %#v\n",
+				test.s, res, test.v)
+			rc = 1
+		}
+	}
+
+	for _, test := range rpnFailTests {
+		res, err := rpn(test.s)
+		if err == nil {
+			fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) -> %#v should fail: %#v\n",
+				test.s, res, test.e)
+		} else if err.Error() != test.e {
+			fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) %#v should be %#v\n",
+				test.s, err.Error(), test.e)
+		}
+	}
+
+	os.Exit(rc)
+}
diff --git a/examples/go/url.rl b/examples/go/url.rl
new file mode 100644
index 00000000..e94d59c6
--- /dev/null
+++ b/examples/go/url.rl
@@ -0,0 +1,414 @@
+// -*-go-*-
+//
+// URL Parser
+// Copyright (c) 2010 J.A. Roberts Tunney
+// MIT License
+//
+// To compile:
+//
+//   ragel -Z -T0 -o url.go url.rl
+//   ragel -Z -T0 -o url_authority.go url_authority.rl
+//   go build -o url url.go url_authority.go
+//   ./url
+//
+// To show a diagram of your state machine:
+//
+//   ragel -V -Z -p -o url.dot url.rl
+//   xdot url.dot
+//
+//   ragel -V -Z -p -o url_authority.dot url_authority.rl
+//   xdot url_authority.dot
+//
+// Reference:
+//
+// - http://tools.ietf.org/html/rfc3986
+//
+
+package main
+
+import (
+  "errors"
+	"fmt"
+	"os"
+	"time"
+)
+
+type URL struct {
+	Scheme    string // http, sip, file, etc. (never blank, always lowercase)
+	User      string // who is you yo
+	Pass      string // for like, logging in
+	Host      string // IP 4/6 address or hostname (mandatory)
+	Port      int    // like 80 or 5060 (default 0)
+	Params    string // stuff after ';' (NOT UNESCAPED, used in sip)
+	Path      string // stuff starting with '/'
+	Query     string // stuff after '?' (NOT UNESCAPED)
+	Fragment  string // stuff after '#'
+}
+
+%% machine url;
+%% write data;
+
+// i parse absolute urls and don't suck at it.  i'll parse just about
+// any type of url you can think of and give you a human-friendly data
+// structure.
+//
+// this routine takes no more than a few microseconds, is reentrant,
+// performs in a predictable manner (for security/soft-realtime,)
+// doesn't modify your `data` buffer, and under no circumstances will
+// it panic (i hope!)
+func URLParse(data []byte) (url *URL, err error) {
+	cs, p, pe, eof := 0, 0, len(data), len(data)
+	mark := 0
+	url = new(URL)
+
+	// this buffer is so we can unescape while we roll
+	var hex byte
+	buf := make([]byte, len(data))
+	amt := 0
+
+	%%{
+		action mark      { mark = p                               }
+		action str_start { amt = 0                                }
+		action str_char  { buf[amt] = fc; amt++                   }
+		action str_lower { buf[amt] = fc + 0x20; amt++            }
+		action hex_hi    { hex = unhex(fc) * 16                   }
+		action hex_lo    { hex += unhex(fc)
+		                   buf[amt] = hex; amt++                  }
+		action scheme    { url.Scheme = string(buf[0:amt])        }
+		action authority { err = url.parseAuthority(data[mark:p])
+		                   if err != nil { return nil, err }      }
+		action path      { url.Path = string(buf[0:amt])          }
+		action query     { url.Query = string(data[mark:p])       }
+		action fragment  { url.Fragment = string(buf[0:amt])      }
+
+		# # do this instead if you *actually* use URNs (lol)
+		# action authority { url.Authority = string(data[mark:p]) }
+
+		# define what a single character is allowed to be
+		toxic     = ( cntrl | 127 ) ;
+		scary     = ( toxic | " " | "\"" | "#" | "%" | "<" | ">" ) ;
+		schmchars = ( lower | digit | "+" | "-" | "." ) ;
+		authchars = any -- ( scary | "/" | "?" | "#" ) ;
+		pathchars = any -- ( scary | "?" | "#" ) ;
+		querchars = any -- ( scary | "#" ) ;
+		fragchars = any -- ( scary ) ;
+
+		# define how characters trigger actions
+		escape    = "%" xdigit xdigit ;
+		unescape  = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
+		schmfirst = ( upper @str_lower ) | ( lower @str_char ) ;
+		schmchar  = ( upper @str_lower ) | ( schmchars @str_char ) ;
+		authchar  = escape | authchars ;
+		pathchar  = unescape | ( pathchars @str_char ) ;
+		querchar  = escape | querchars ;
+		fragchar  = unescape | ( fragchars @str_char ) ;
+
+		# define multi-character patterns
+		scheme    = ( schmfirst schmchar* ) >str_start %scheme ;
+		authority = authchar+ >mark %authority ;
+		path      = ( ( "/" @str_char ) pathchar* ) >str_start %path ;
+		query     = "?" ( querchar* >mark %query ) ;
+		fragment  = "#" ( fragchar* >str_start %fragment ) ;
+		url       = scheme ":" "//"? authority path? query? fragment?
+			      | scheme ":" "//" authority? path? query? fragment?
+			      ;
+
+		main := url;
+		write init;
+		write exec;
+	}%%
+
+	if cs < url_first_final {
+		if p == pe {
+			return nil, errors.New(
+				fmt.Sprintf("unexpected eof: %s", data))
+		} else {
+			return nil, errors.New(
+				fmt.Sprintf("error in url at pos %d: %s", p, data))
+		}
+	}
+
+	return url, nil
+}
+
+func unhex(b byte) byte {
+	switch {
+	case '0' <= b && b <= '9':
+		return b - '0'
+	case 'a' <= b && b <= 'f':
+		return b - 'a' + 10
+	case 'A' <= b && b <= 'F':
+		return b - 'A' + 10
+	}
+	return 0
+}
+
+//////////////////////////////////////////////////////////////////////
+
+type urlTest struct {
+	s []byte
+	url URL
+}
+
+var urlTests = []urlTest{
+
+	urlTest{
+		[]byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"),
+		URL{
+			Scheme: "http",
+			User: "user",
+			Pass: "pass",
+			Host: "example.com",
+			Port: 80,
+			Params: "hello",
+			Path: "/lol.php",
+			Query: "fun",
+			Fragment: "omg",
+		},
+	},
+
+	urlTest{
+		[]byte("a:b"),
+		URL{
+			Scheme: "a",
+			Host: "b",
+		},
+	},
+
+	urlTest{
+		[]byte("GoPHeR://@example.com@:;/?#"),
+		URL{
+			Scheme: "gopher",
+			Host: "@example.com@",
+			Path: "/",
+		},
+	},
+
+	urlTest{
+		[]byte("ldap://[2001:db8::7]/c=GB?objectClass/?one"),
+		URL{
+			Scheme: "ldap",
+			Host: "2001:db8::7",
+			Path: "/c=GB",
+			Query: "objectClass/?one",
+		},
+	},
+
+	urlTest{
+		[]byte("http://user@example.com"),
+		URL{
+			Scheme: "http",
+			User: "user",
+			Host: "example.com",
+		},
+	},
+
+	urlTest{
+		[]byte("http://品研发和研发管@☃.com:65000;%20"),
+		URL{
+			Scheme: "http",
+			User: "品研发和研发管",
+			Host: "☃.com",
+			Port: 65000,
+			Params: "%20",
+		},
+	},
+
+	urlTest{
+		[]byte("https://example.com:80"),
+		URL{
+			Scheme: "https",
+			Host: "example.com",
+			Port: 80,
+		},
+	},
+
+	urlTest{
+		[]byte("file:///etc/passwd"),
+		URL{
+			Scheme: "file",
+			Path: "/etc/passwd",
+		},
+	},
+
+	urlTest{
+		[]byte("file:///c:/WINDOWS/clock.avi"),
+		URL{
+			Scheme: "file",
+			Path: "/c:/WINDOWS/clock.avi", // <-- is this kosher?
+		},
+	},
+
+	urlTest{
+		[]byte("file://hostname/path/to/the%20file.txt"),
+		URL{
+			Scheme: "file",
+			Host: "hostname",
+			Path: "/path/to/the file.txt",
+		},
+	},
+
+	urlTest{
+		[]byte("sip:example.com"),
+		URL{
+			Scheme: "sip",
+			Host: "example.com",
+		},
+	},
+
+	urlTest{
+		[]byte("sip:example.com:5060"),
+		URL{
+			Scheme: "sip",
+			Host: "example.com",
+			Port: 5060,
+		},
+	},
+
+	urlTest{
+		[]byte("mailto:ditto@pokémon.com"),
+		URL{
+			Scheme: "mailto",
+			User: "ditto",
+			Host: "pokémon.com",
+		},
+	},
+
+	urlTest{
+		[]byte("sip:[dead:beef::666]:5060"),
+		URL{
+			Scheme: "sip",
+			Host: "dead:beef::666",
+			Port: 5060,
+		},
+	},
+
+	urlTest{
+		[]byte("tel:+12126660420"),
+		URL{
+			Scheme: "tel",
+			Host: "+12126660420",
+		},
+	},
+
+	urlTest{
+		[]byte("sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg"),
+		URL{
+			Scheme: "sip",
+			User: "bob barker",
+			Pass: "priceisright",
+			Host: "dead:beef::666",
+			Port: 5060,
+			Params: "isup-oli=00",
+			Path: "/palfun.html",
+			Query: "haha",
+			Fragment: "omg",
+		},
+	},
+
+	urlTest{
+		[]byte("http://www.google.com/search?%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai="),
+		URL{
+			Scheme: "http",
+			Host: "www.google.com",
+			Path: "/search",
+			Query: "%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=",
+		},
+	},
+
+}
+
+func (test *urlTest) compare(url *URL) (passed bool) {
+	if url.Scheme != test.url.Scheme {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) scheme: %#v != %#v\n",
+			string(test.s), url.Scheme, test.url.Scheme)
+		passed = true
+	}
+	if url.User != test.url.User {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) user: %#v != %#v\n",
+			string(test.s), url.User, test.url.User)
+		passed = true
+	}
+	if url.Pass != test.url.Pass {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) pass: %#v != %#v\n",
+			string(test.s), url.Pass, test.url.Pass)
+		passed = true
+	}
+	if url.Host != test.url.Host {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) host: %#v != %#v\n",
+			string(test.s), url.Host, test.url.Host)
+		passed = true
+	}
+	if url.Port != test.url.Port {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n",
+			string(test.s), url.Port, test.url.Port)
+		passed = true
+	}
+	if url.Port != test.url.Port {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n",
+			string(test.s), url.Port, test.url.Port)
+		passed = true
+	}
+	if url.Params != test.url.Params {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) params: %#v != %#v\n",
+			string(test.s), url.Params, test.url.Params)
+		passed = true
+	}
+	if url.Path != test.url.Path {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) path: %#v != %#v\n",
+			string(test.s), url.Path, test.url.Path)
+		passed = true
+	}
+	if url.Query != test.url.Query {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) query: %#v != %#v\n",
+			string(test.s), url.Query, test.url.Query)
+		passed = true
+	}
+	if url.Fragment != test.url.Fragment {
+		fmt.Fprintf(os.Stderr, "FAIL url(%#v) fragment: %#v != %#v\n",
+			string(test.s), url.Fragment, test.url.Fragment)
+		passed = true
+	}
+	return !passed
+}
+
+func bench() {
+	const rounds = 10000
+	for _, s := range [][]byte{
+		[]byte("a:a"),
+		[]byte("http://google.com/"),
+		[]byte("sip:jtunney@lobstertech.com"),
+		[]byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"),
+		[]byte("file:///etc/passwd"),
+	} {
+		ts1 := time.Now()
+		for i := 0; i < rounds; i++ {
+			URLParse(s)
+		}
+		ts2 := time.Now()
+		fmt.Printf("BENCH URLParse(%s) -> %d ns\n", s, ts2.Sub(ts1).Nanoseconds() / rounds)
+	}
+}
+
+func test() (rc int) {
+	for _, test := range urlTests {
+		url, err := URLParse(test.s)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "FAIL url(%#v) %s\n", string(test.s), err)
+			rc = 1
+			continue
+		}
+		if !test.compare(url) {
+			rc = 1
+		}
+	}
+	return rc
+}
+
+func main() {
+	rc := test()
+	if rc == 0 {
+		bench()
+	}
+	os.Exit(rc)
+}
diff --git a/examples/go/url_authority.rl b/examples/go/url_authority.rl
new file mode 100644
index 00000000..3e651ad0
--- /dev/null
+++ b/examples/go/url_authority.rl
@@ -0,0 +1,165 @@
+// -*-go-*-
+//
+// URL Parser
+// Copyright (c) 2010 J.A. Roberts Tunney
+// MIT License
+//
+
+package main
+
+import (
+    "errors"
+    "fmt"
+    "strconv"
+)
+
+%% machine url_authority;
+%% write data;
+
+// i parse strings like `alice@pokémon.com`.
+//
+// sounds simple right?  but i also parse stuff like:
+//
+//   bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00
+//
+// which in actual reality is:
+//
+// - User: "bob barker"
+// - Pass: "priceisright"
+// - Host: "dead:beef::666"
+// - Port: 5060
+// - Params: "isup-oli=00"
+//
+// which was probably extracted from an absolute url that looked like:
+//
+//   sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg
+//
+// which was probably extracted from its address form:
+//
+//   "Bob Barker" <sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg>;tag=666
+//
+// who would have thought this could be so hard ._.
+func (url *URL) parseAuthority(data []byte) (err error) {
+	cs, p, pe, eof := 0, 0, len(data), len(data)
+	mark := 0
+
+	// temporary holding place for user:pass and/or host:port cuz an
+	// optional term (user[:pass]) coming before a mandatory term
+	// (host[:pass]) would require require backtracking and all that
+	// evil nondeterministic stuff which ragel seems to hate.  (for
+	// this same reason you're also allowed to use square quotes
+	// around the username.)
+	var b1, b2 string
+
+	// this buffer is so we can unescape while we roll
+	var hex byte
+	buf := make([]byte, len(data))
+	amt := 0
+
+	%%{
+		action mark        { mark = p                         }
+		action str_start   { amt = 0                          }
+		action str_char    { buf[amt] = fc; amt++             }
+		action hex_hi      { hex = unhex(fc) * 16             }
+		action hex_lo      { hex += unhex(fc)
+		                     buf[amt] = hex; amt++            }
+		action copy_b1     { b1 = string(buf[0:amt]); amt = 0 }
+		action copy_b2     { b2 = string(buf[0:amt]); amt = 0 }
+		action copy_host   { url.Host = string(b1); amt = 0   }
+
+		action copy_port {
+			if b2 != "" {
+				url.Port, err = strconv.Atoi(string(b2))
+		        if err != nil { goto fail }
+		        if url.Port > 65535 { goto fail }
+			}
+		}
+
+		action params {
+			url.Params = string(data[mark:p])
+		}
+
+		action params_eof {
+			url.Params = string(data[mark:p])
+			return nil
+		}
+
+		action atsymbol {
+			url.User = string(b1)
+			url.Pass = string(b2)
+			b2 = ""
+		}
+
+		action alldone {
+			url.Host = string(b1)
+			if url.Host == "" {
+				url.Host = string(buf[0:amt])
+			} else {
+				if amt > 0 {
+					b2 = string(buf[0:amt])
+				}
+				if b2 != "" {
+					url.Port, err = strconv.Atoi(string(b2))
+					if err != nil { goto fail }
+					if url.Port > 65535 { goto fail }
+				}
+			}
+			return nil
+		}
+
+		# define what a single character is allowed to be
+		toxic         = ( cntrl | 127 ) ;
+		scary         = ( toxic | space | "\"" | "#" | "%" | "<" | ">" ) ;
+		authdelims    = ( "/" | "?" | "#" | ":" | "@" | ";" | "[" | "]" ) ;
+		userchars     = any -- ( authdelims | scary ) ;
+		userchars_esc = userchars | ":" ;
+		passchars     = userchars ;
+		hostchars     = passchars | "@" ;
+		hostchars_esc = hostchars | ":" ;
+		portchars     = digit ;
+		paramchars    = hostchars | ":" | ";" ;
+
+		# define how characters trigger actions
+		escape        = "%" xdigit xdigit ;
+		unescape      = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
+		userchar      = unescape | ( userchars @str_char ) ;
+		userchar_esc  = unescape | ( userchars_esc @str_char ) ;
+		passchar      = unescape | ( passchars @str_char ) ;
+		hostchar      = unescape | ( hostchars @str_char ) ;
+		hostchar_esc  = unescape | ( hostchars_esc @str_char ) ;
+		portchar      = unescape | ( portchars @str_char ) ;
+		paramchar     = escape | paramchars ;
+
+		# define multi-character patterns
+		user_plain    = userchar+ >str_start %copy_b1 ;
+		user_quoted   = "[" ( userchar_esc+ >str_start %copy_b1 ) "]" ;
+		user          = ( user_quoted | user_plain ) %/alldone ;
+		pass          = passchar+ >str_start %copy_b2 %/alldone ;
+		host_plain    = hostchar+ >str_start %copy_b1 %copy_host ;
+		host_quoted   = "[" ( hostchar_esc+ >str_start %copy_b1 %copy_host ) "]" ;
+		host          = ( host_quoted | host_plain ) %/alldone ;
+		port          = portchar* >str_start %copy_b2 %copy_port %/alldone ;
+		params        = ";" ( paramchar* >mark %params %/params_eof ) ;
+		userpass      = user ( ":" pass )? ;
+		hostport      = host ( ":" port )? ;
+		authority     = ( userpass ( "@" @atsymbol ) )? hostport params? ;
+
+		main := authority;
+		write init;
+		write exec;
+	}%%
+
+	// if cs >= url_authority_first_final {
+	// 	return nil
+	// }
+
+fail:
+	// fmt.Println("error state", cs)
+	// fmt.Println(string(data))
+	// for i := 0; i < p; i++ {
+	// 	fmt.Print(" ")
+	// }
+	// fmt.Println("^")
+	// fmt.Println(url)
+	return errors.New(fmt.Sprintf("bad url authority: %#v", string(data)))
+}
diff --git a/examples/gotocallret.rl b/examples/gotocallret.rl
new file mode 100644
index 00000000..32c01a2c
--- /dev/null
+++ b/examples/gotocallret.rl
@@ -0,0 +1,96 @@
+/*
+ * Demonstrate the use of goto, call and return. This machine expects either a
+ * lower case char or a digit as a command then a space followed by the command
+ * arg. If the command is a char, then the arg must be an a string of chars.
+ * If the command is a digit, then the arg must be a string of digits. This
+ * choice is determined by action code, rather than though transition
+ * desitinations.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+struct GotoCallRet 
+{
+	char comm;
+	int cs, top, stack[32];
+
+	int init( );
+	int execute( const char *data, int len, bool isEof );
+	int finish( );
+};
+
+%%{
+	machine GotoCallRet;
+
+	# Error machine, consumes to end of 
+	# line, then starts the main line over.
+	garble_line := (
+		(any-'\n')*'\n'
+	) >{cout << "error: garbling line" << endl;} @{fgoto main;};
+
+	# Look for a string of alphas or of digits, 
+	# on anything else, hold the character and return.
+	alp_comm := alpha+ $!{fhold;fret;};
+	dig_comm := digit+ $!{fhold;fret;};
+
+	# Choose which to machine to call into based on the command.
+	action comm_arg {
+		if ( comm >= 'a' )
+			fcall alp_comm;
+		else 
+			fcall dig_comm;
+	}
+
+	# Specifies command string. Note that the arg is left out.
+	command = (
+		[a-z0-9] @{comm = fc;} ' ' @comm_arg '\n'
+	) @{cout << "correct command" << endl;};
+
+	# Any number of commands. If there is an 
+	# error anywhere, garble the line.
+	main := command* $!{fhold;fgoto garble_line;};
+}%%
+
+%% write data;
+
+int GotoCallRet::init( )
+{
+	%% write init;
+	return 1;
+}
+
+int GotoCallRet::execute( const char *data, int len, bool isEof )
+{
+	const char *p = data;
+	const char *pe = data + len;
+	const char *eof = isEof ? pe : 0;
+
+	%% write exec;
+	if ( cs == GotoCallRet_error )
+		return -1;
+	if ( cs >= GotoCallRet_first_final )
+		return 1;
+	return 0;
+}
+
+#define BUFSIZE 1024
+
+int main()
+{
+	char buf[BUFSIZE];
+
+	GotoCallRet gcr;
+	gcr.init();
+	while ( fgets( buf, sizeof(buf), stdin ) != 0 )
+		gcr.execute( buf, strlen(buf), false );
+
+	gcr.execute( 0, 0, true );
+	if ( gcr.cs < GotoCallRet_first_final )
+		cerr << "gotocallret: error: parsing input" << endl;
+	return 0;
+}
diff --git a/examples/mailbox.rl b/examples/mailbox.rl
new file mode 100644
index 00000000..94590fdd
--- /dev/null
+++ b/examples/mailbox.rl
@@ -0,0 +1,207 @@
+/*
+ * Parses unix mail boxes into headers and bodies.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+/* A growable buffer for collecting headers. */
+struct Buffer
+{
+	Buffer() : data(0), allocated(0), length(0) { }
+	~Buffer() { empty(); }
+
+	void append( char p ) {
+		if ( ++length > allocated )
+			upAllocate( length*2 );
+		data[length-1] = p;
+	}
+		
+	void clear() { length = 0; }
+	void upAllocate( int len );
+	void empty();
+
+	char *data;
+	int allocated;
+	int length;
+};
+
+
+struct MailboxScanner
+{
+	Buffer headName;
+	Buffer headContent;
+
+	int cs, top, stack[1];
+
+	int init( );
+	int execute( const char *data, int len, bool isEof );
+	int finish( );
+};
+
+%%{
+	machine MailboxScanner;
+
+	# Buffer the header names.
+	action bufHeadName { headName.append(fc); }
+
+	# Prints a blank line after the end of the headers of each message.
+	action blankLine { cout << endl; }
+	
+	# Helpers we will use in matching the date section of the from line.
+	day = /[A-Z][a-z][a-z]/;
+	month = /[A-Z][a-z][a-z]/;
+	year = /[0-9][0-9][0-9][0-9]/;
+	time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' );
+	letterZone = /[A-Z][A-Z][A-Z]/;
+	numZone = /[+\-][0-9][0-9][0-9][0-9]/;
+	zone = letterZone | numZone;
+	dayNum = /[0-9 ][0-9]/;
+
+	# These are the different formats of the date minus an obscure
+	# type that has a funny string 'remote from xxx' on the end. Taken
+	# from c-client in the imap-2000 distribution.
+	date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' .
+		( year | year . ' ' . zone | zone . ' ' . year );
+
+	# From lines separate messages. We will exclude fromLine from a message
+	# body line.  This will cause us to stay in message line up until an
+	# entirely correct from line is matched.
+	fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n';
+
+	# The types of characters that can be used as a header name.
+	hchar = print - [ :];
+
+	# Simply eat up an uninteresting header. Return at the first non-ws
+	# character following a newline.
+	consumeHeader := ( 
+			[^\n] | 
+			'\n' [ \t] |
+			'\n' [^ \t] @{fhold; fret;}
+		)*;
+
+	action hchar {headContent.append(fc);}
+	action hspace {headContent.append(' ');}
+
+	action hfinish {
+		headContent.append(0);
+		cout << headContent.data << endl;
+		headContent.clear();
+		fhold;
+		fret;
+	}
+
+	# Display the contents of a header as it is consumed. Collapses line
+	# continuations to a single space. 
+	printHeader := ( 
+		[^\n] @hchar  | 
+		( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace
+	)** $!hfinish;
+
+	action onHeader 
+	{
+		headName.append(0);
+		if ( strcmp( headName.data, "From" ) == 0 ||
+				strcmp( headName.data, "To" ) == 0 ||
+				strcmp( headName.data, "Subject" ) == 0 )
+		{
+			/* Print the header name, then jump to a machine the will display
+			 * the contents. */
+			cout << headName.data << ":";
+			headName.clear();
+			fcall printHeader;
+		}
+
+		headName.clear();
+		fcall consumeHeader;
+	}
+
+	header = hchar+ $bufHeadName ':' @onHeader;
+
+	# Exclude fromLine from a messageLine, otherwise when encountering a
+	# fromLine we will be simultaneously matching the old message and a new
+	# message.
+	messageLine = ( [^\n]* '\n' - fromLine );
+
+	# An entire message.
+	message = ( fromLine .  header* .  '\n' @blankLine .  messageLine* );
+
+	# File is a series of messages.
+	main := message*;
+}%%
+
+%% write data;
+
+int MailboxScanner::init( )
+{
+	%% write init;
+	return 1;
+}
+
+int MailboxScanner::execute( const char *data, int len, bool isEof )
+{
+	const char *p = data;
+	const char *pe = data + len;
+	const char *eof = isEof ? pe : 0;
+
+	%% write exec;
+
+	if ( cs == MailboxScanner_error )
+		return -1;
+	if ( cs >= MailboxScanner_first_final )
+		return 1;
+	return 0;
+}
+
+int MailboxScanner::finish( )
+{
+	if ( cs == MailboxScanner_error )
+		return -1;
+	if ( cs >= MailboxScanner_first_final )
+		return 1;
+	return 0;
+}
+
+
+void Buffer::empty()
+{
+	if ( data != 0 ) {
+		free( data );
+
+		data = 0;
+		length = 0;
+		allocated = 0;
+	}
+}
+
+void Buffer::upAllocate( int len )
+{
+	if ( data == 0 )
+		data = (char*) malloc( len );
+	else
+		data = (char*) realloc( data, len );
+	allocated = len;
+}
+
+MailboxScanner mailbox;
+char buf[BUFSIZE];
+
+int main()
+{
+	mailbox.init();
+	while ( 1 ) {
+		int len = fread( buf, 1, BUFSIZE, stdin );
+		mailbox.execute( buf, len, len != BUFSIZE );
+		if ( len != BUFSIZE )
+			break;
+	}
+	if ( mailbox.finish() <= 0 )
+		cerr << "mailbox: error parsing input" << endl;
+	return 0;
+}
diff --git a/examples/params.rl b/examples/params.rl
new file mode 100644
index 00000000..a8ffeae9
--- /dev/null
+++ b/examples/params.rl
@@ -0,0 +1,102 @@
+/*
+ * Parse command line arguments.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define BUFLEN 1024
+
+struct params
+{
+	char buffer[BUFLEN+1];
+	int buflen;
+	int cs;
+};
+
+%%{
+	machine params;
+	access fsm->;
+
+	# A buffer to collect argurments
+
+	# Append to the buffer.
+	action append {
+		if ( fsm->buflen < BUFLEN )
+			fsm->buffer[fsm->buflen++] = fc;
+	}
+
+	# Terminate a buffer.
+	action term {
+		if ( fsm->buflen < BUFLEN )
+			fsm->buffer[fsm->buflen++] = 0;
+	}
+
+	# Clear out the buffer
+	action clear { fsm->buflen = 0; }
+
+	action help { printf("help\n"); }
+	action version { printf("version\n"); }
+	action output { printf("output: \"%s\"\n", fsm->buffer); }
+	action spec { printf("spec: \"%s\"\n", fsm->buffer); }
+	action mach { printf("machine: \"%s\"\n", fsm->buffer); }
+
+	# Helpers that collect strings
+	string = [^\0]+ >clear $append %term;
+
+	# Different arguments.
+	help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help;
+	version = ( '-v' | '--version' ) 0 @version;
+	output = '-o' 0? string 0 @output;
+	spec = '-S' 0? string 0 @spec;
+	mach = '-M' 0? string 0 @mach;
+
+	main := ( 
+		help | 
+		version | 
+		output |
+		spec |
+		mach
+	)*;
+}%%
+
+%% write data;
+
+void params_init( struct params *fsm )
+{
+	fsm->buflen = 0;
+	%% write init;
+}
+
+void params_execute( struct params *fsm, const char *data, int len )
+{
+	const char *p = data;
+	const char *pe = data + len;
+
+	%% write exec;
+}
+
+int params_finish( struct params *fsm )
+{
+	if ( fsm->cs == params_error )
+		return -1;
+	if ( fsm->cs >= params_first_final )
+		return 1;
+	return 0;
+}
+
+#define BUFSIZE 2048
+
+int main( int argc, char **argv )
+{
+	int a;
+	struct params params;
+
+	params_init( &params );
+	for ( a = 1; a < argc; a++ )
+		params_execute( &params, argv[a], strlen(argv[a])+1 );
+	if ( params_finish( &params ) != 1 )
+		fprintf( stderr, "params: error processing arguments\n" );
+
+	return 0;
+}
diff --git a/examples/pullscan.rl b/examples/pullscan.rl
new file mode 100644
index 00000000..d9e8a579
--- /dev/null
+++ b/examples/pullscan.rl
@@ -0,0 +1,170 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define BUFSIZE 4096
+
+typedef struct _Scanner {
+	/* Scanner state. */
+    int cs;
+    int act;
+    int have;
+    int curline;
+    char *ts;
+    char *te;
+    char *p;
+    char *pe;
+    char *eof;
+	FILE *file;
+	int done;
+
+	/* Token data */
+	char *data;
+	int len;
+    int value;
+
+	char buf[BUFSIZE];
+} Scanner;
+
+
+%%{
+	machine Scanner;
+	write data;
+}%%
+
+void scan_init( Scanner *s, FILE *file )
+{
+	memset (s, '\0', sizeof(Scanner));
+	s->curline = 1;
+	s->file = file;
+	s->eof = 0;
+	%% write init;
+}
+
+#define TK_NO_TOKEN (-1)
+#define TK_ERR 128
+#define TK_EOF 129
+#define TK_Identifier 130
+#define TK_Number 131
+#define TK_String 132
+
+#define ret_tok( _tok ) token = _tok; s->data = s->ts
+
+int scan( Scanner *s )
+{
+	int token = TK_NO_TOKEN;
+	int space, readlen;
+
+	while ( 1 ) {
+		if ( s->p == s->pe ) {
+			printf("scanner: need more data\n");
+
+			if ( s->ts == 0 )
+				s->have = 0;
+			else {
+				/* There is data that needs to be shifted over. */
+				printf("scanner: buffer broken mid token\n");
+				s->have = s->pe - s->ts;
+				memmove( s->buf, s->ts, s->have );
+				s->te -= (s->ts-s->buf);
+				s->ts = s->buf;
+			}
+
+			s->p = s->buf + s->have;
+			space = BUFSIZE - s->have;
+
+			if ( space == 0 ) {
+				/* We filled up the buffer trying to scan a token. */
+				printf("scanner: out of buffer space\n");
+				return TK_ERR;
+			}
+
+			if ( s->done ) {
+				printf("scanner: end of file\n");
+				s->p[0] = 0;
+				readlen = 1;
+			}
+			else {
+				readlen = fread( s->p, 1, space, s->file );
+				if ( readlen < space )
+					s->done = 1;
+			}
+
+			s->pe = s->p + readlen;
+		}
+
+		%%{
+			machine Scanner;
+			access s->;
+			variable p s->p;
+			variable pe s->pe;
+			variable eof s->eof;
+
+			main := |*
+
+			# Identifiers
+			( [a-zA-Z_] [a-zA-Z0-9_]* ) =>
+				{ ret_tok( TK_Identifier ); fbreak; };
+
+			# Whitespace
+			[ \t\n];
+
+			'"' ( [^\\"] | '\\' any ) * '"' =>
+				{ ret_tok( TK_String ); fbreak; };
+
+			# Number
+			digit+ => 
+				{ ret_tok( TK_Number ); fbreak; };
+			
+			# EOF
+			0 =>
+				{ ret_tok( TK_EOF ); fbreak; };
+
+			# Anything else
+			any => 
+				{ ret_tok( *s->p ); fbreak; };
+
+			*|;
+
+			write exec;
+		}%%
+
+		if ( s->cs == Scanner_error )
+			return TK_ERR;
+
+		if ( token != TK_NO_TOKEN ) {
+			s->len = s->p - s->data;
+			return token;
+		}
+	}
+}
+
+
+int main (int argc, char** argv)
+{
+	Scanner ss;
+	int tok;
+
+	scan_init(&ss, stdin);
+
+	while ( 1 ) {
+		tok = scan (&ss);
+		if ( tok == TK_EOF ) {
+			printf ("parser: EOF\n");
+			break;
+		}
+		else if ( tok == TK_ERR ) {
+			printf ("parser: ERR\n");
+			break;
+		}
+		else {
+			printf ("parser: %d \"", tok);
+			fwrite ( ss.data, 1, ss.len, stdout );
+			printf ("\"\n" );
+		}
+	}
+
+	return 0;
+}
+
+
diff --git a/examples/rlscan.rl b/examples/rlscan.rl
new file mode 100644
index 00000000..d4d4bf97
--- /dev/null
+++ b/examples/rlscan.rl
@@ -0,0 +1,300 @@
+/*
+ * Lexes Ragel input files.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+void escapeXML( char *data )
+{
+	while ( *data != 0 ) {
+		switch ( *data ) {
+			case '<': cout << "&lt;"; break;
+			case '>': cout << "&gt;"; break;
+			case '&': cout << "&amp;"; break;
+			default: cout << *data; break;
+		}
+		data += 1;
+	}
+}
+
+void escapeXML( char c )
+{
+	switch ( c ) {
+		case '<': cout << "&lt;"; break;
+		case '>': cout << "&gt;"; break;
+		case '&': cout << "&amp;"; break;
+		default: cout << c; break;
+	}
+}
+
+void escapeXML( char *data, int len )
+{
+	for ( char *end = data + len; data != end; data++  ) {
+		switch ( *data ) {
+			case '<': cout << "&lt;"; break;
+			case '>': cout << "&gt;"; break;
+			case '&': cout << "&amp;"; break;
+			default: cout << *data; break;
+		}
+	}
+}
+
+inline void write( const char *data )
+{
+	cout << data;
+}
+
+inline void write( char c )
+{
+	cout << c;
+}
+
+inline void write( char *data, int len )
+{
+	cout.write( data, len );
+}
+
+
+%%{
+	machine RagelScan;
+
+	word = [a-zA-Z_][a-zA-Z_0-9]*;
+	integer = [0-9]+;
+	hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+	default = ^0;
+	EOF = 0;
+
+	# Handles comments in outside code and inline blocks.
+	c_comment := 
+		( default* :>> '*/' )
+		${ escapeXML( fc ); }
+		@{ fret; };
+
+	action emit {
+		escapeXML( ts, te-ts );
+	}
+
+	#
+	# Inline action code
+	#
+
+	ilscan := |*
+
+		"'" ( [^'\\] | /\\./ )* "'" => emit;
+		'"' ( [^"\\] | /\\./ )* '"' => emit;
+		'/*' {
+			write( "/*" );
+			fcall c_comment;
+		};
+		'//' [^\n]* '\n' => emit;
+
+		'{' {
+			write( '{' );
+			inline_depth += 1; 
+		};
+
+		'}' {
+			write( '}' );
+			/* If dropping down to the last } then return 
+			 * to ragel code. */
+			if ( --inline_depth == 0 ) {
+				write( "</inline>\n" );
+				fgoto rlscan;
+			}
+		};
+
+		default => { escapeXML( *ts ); };
+	*|;
+
+	#
+	# Ragel Tokens
+	#
+
+	rlscan := |*
+		'}%%' {
+			if ( !single_line ) {
+				write( "</section>\n" );
+				fgoto main;
+			}
+		};
+
+		'\n' {
+			if ( single_line ) {
+				write( "</section>\n" );
+				fgoto main;
+			}
+		};
+
+		# Word
+		word {
+			write( "<word>" );
+			write( ts, te-ts );
+			write( "</word>\n" );
+		};
+
+		# Decimal integer.
+		integer {
+			write( "<int>" );
+			write( ts, te-ts );
+			write( "</int>\n" );
+		};
+
+		# Hexidecimal integer.
+		hex {
+			write( "<hex>" );
+			write( ts, te-ts );
+			write( "</hex>\n" );
+		};
+
+		# Consume comments.
+		'#' [^\n]* '\n';
+
+		# Single literal string.
+		"'" ( [^'\\] | /\\./ )* "'" {
+			write( "<single_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</single_lit>\n" );
+		};
+
+		# Double literal string.
+		'"' ( [^"\\] | /\\./ )* '"' {
+			write( "<double_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</double_lit>\n" );
+		};
+
+		# Or literal.
+		'[' ( [^\]\\] | /\\./ )* ']' {
+			write( "<or_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</or_lit>\n" );
+		};
+
+		# Regex Literal.
+		'/' ( [^/\\] | /\\./ ) * '/' {
+			write( "<re_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</re_lit>\n" );
+		};
+
+		# Open an inline block
+		'{' {
+			inline_depth = 1;
+			write( "<inline>{" );
+			fgoto ilscan;
+		};
+
+		punct {
+			write( "<symbol>" );
+			escapeXML( fc );
+			write( "</symbol>\n" );
+		};
+		
+		default;
+	*|;
+
+	#
+	# Outside code.
+	#
+
+	main := |*
+
+		"'" ( [^'\\] | /\\./ )* "'" => emit;
+		'"' ( [^"\\] | /\\./ )* '"' => emit;
+
+		'/*' {
+			escapeXML( ts, te-ts );
+			fcall c_comment;
+		};
+
+		'//' [^\n]* '\n' => emit;
+
+		'%%{' { 
+			write( "<section>\n" );
+			single_line = false;
+			fgoto rlscan;
+		};
+
+		'%%' {
+			write( "<section>\n" ); 
+			single_line = true; 
+			fgoto rlscan;
+		};
+
+		default { 
+			escapeXML( *ts );
+		};
+
+		# EOF.
+		EOF;
+	*|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 2048
+
+int main()
+{
+	std::ios::sync_with_stdio(false);
+
+	int cs, act;
+	char *ts, *te;
+	int stack[1], top;
+
+	static char inbuf[BUFSIZE];
+	bool single_line = false;
+	int inline_depth = 0;
+
+	%% write init;
+
+	bool done = false;
+	int have = 0;
+	while ( !done ) {
+		/* How much space is in the buffer? */
+		int space = BUFSIZE - have;
+		if ( space == 0 ) {
+			/* Buffer is full. */
+			cerr << "TOKEN TOO BIG" << endl;
+			exit(1);
+		}
+
+		/* Read in a block. */
+		char *p = inbuf + have;
+		cin.read( p, space );
+		int len = cin.gcount();
+		char *pe = p + len;
+		char *eof = 0;
+
+		/* Check for EOF. */
+		if ( len == 0 ) {
+			eof = pe;
+			done = true;
+		}
+
+		%% write exec;
+
+		if ( cs == RagelScan_error ) {
+			/* Machine failed before finding a token. */
+			cerr << "PARSE ERROR" << endl;
+			exit(1);
+		}
+
+		if ( ts == 0 )
+			have = 0;
+		else {
+			/* There is a prefix to preserve, shift it over. */
+			have = pe - ts;
+			memmove( inbuf, ts, have );
+			te = inbuf + (te-ts);
+			ts = inbuf;
+		}
+	}
+	return 0;
+}
diff --git a/examples/statechart.rl b/examples/statechart.rl
new file mode 100644
index 00000000..a04471b5
--- /dev/null
+++ b/examples/statechart.rl
@@ -0,0 +1,116 @@
+/*
+ * Demonstrate the use of labels, the epsilon operator, and the join operator
+ * for creating machines using the named state and transition list paradigm.
+ * This implementes the same machine as the atoi example.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+struct StateChart
+{
+	bool neg;
+	int val;
+	int cs;
+
+	int init( );
+	int execute( const char *data, int len );
+	int finish( );
+};
+
+%%{
+	machine StateChart;
+
+	action begin {
+		neg = false;
+		val = 0;
+	}
+
+	action see_neg {
+		neg = true;
+	}
+
+	action add_digit { 
+		val = val * 10 + (fc - '0');
+	}
+
+	action finish {
+		if ( neg )
+			val = -1 * val;
+	}
+
+	atoi = (
+		start: (
+			'-' @see_neg ->om_num | 
+			'+' ->om_num |
+			[0-9] @add_digit ->more_nums
+		),
+
+		# One or more nums.
+		om_num: (
+			[0-9] @add_digit ->more_nums
+		),
+
+		# Zero ore more nums.
+		more_nums: (
+			[0-9] @add_digit ->more_nums |
+			'' -> final
+		)
+	) >begin %finish;
+
+	main := ( atoi '\n' @{ cout << val << endl; } )*;
+}%%
+
+%% write data;
+
+int StateChart::init( )
+{
+	neg = false;
+	val = false;
+	%% write init;
+	return 1;
+}
+
+int StateChart::execute( const char *data, int len )
+{
+	const char *p = data;
+	const char *pe = data + len;
+
+	%% write exec;
+
+	if ( cs == StateChart_error )
+		return -1;
+	if ( cs >= StateChart_first_final )
+		return 1;
+	return 0;
+}
+
+int StateChart::finish( )
+{
+	if ( cs == StateChart_error )
+		return -1;
+	if ( cs >= StateChart_first_final )
+		return 1;
+	return 0;
+}
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+	char buf[BUFSIZE];
+
+	StateChart atoi;
+	atoi.init();
+	while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+		atoi.execute( buf, strlen(buf) );
+	}
+	if ( atoi.finish() <= 0 )
+		cerr << "statechart: error: parsing input" << endl;
+	return 0;
+}
diff --git a/examples/uri.rl b/examples/uri.rl
new file mode 100644
index 00000000..185a76c6
--- /dev/null
+++ b/examples/uri.rl
@@ -0,0 +1,31 @@
+%%{
+	machine uri;
+
+	action scheme {}
+	action loc {}
+	action item {}
+	action query {}
+	action last {}
+	action nothing {}
+
+	main :=
+		# Scheme machine. This is ambiguous with the item machine. We commit
+		# to the scheme machine on colon.
+		( [^:/?#]+ ':' @(colon,1) @scheme )?
+
+		# Location machine. This is ambiguous with the item machine. We remain
+		# ambiguous until a second slash, at that point and all points after
+		# we place a higher priority on staying in the location machine over
+		# moving into the item machine.
+		( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )? 
+
+		# Item machine. Ambiguous with both scheme and location, which both
+		# get a higher priority on the characters causing ambiguity.
+		( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )? 
+
+		# Last two components, the characters that initiate these machines are
+		# not supported in any previous components, therefore there are no
+		# ambiguities introduced by these parts.
+		( '?' [^#]* %query %/query)?
+		( '#' any* %/last )?;
+}%%
author	Adrian Thurston <thurston@colm.net>	2019-09-11 18:22:31 -0600
committer	Adrian Thurston <thurston@colm.net>	2019-09-11 18:22:31 -0600
commit	e4f23077edf61818128b355f2aab2b900702ea97 (patch)
tree	05d3294062b259a3d72e277950e1364c50bbea07 /examples
parent	bccaa853593339c2bac8ddede25f18e1afc91597 (diff)
download	colm-e4f23077edf61818128b355f2aab2b900702ea97.tar.gz