summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2007-01-21 22:58:22 +0000
committerAdrian Thurston <thurston@complang.org>2007-01-21 22:58:22 +0000
commit86214ecf32c2bafd549952c93eab006d20327736 (patch)
tree9b5449ef42e829f98bf7a6c6e0554b88d4ab9132 /examples
downloadragel-86214ecf32c2bafd549952c93eab006d20327736.tar.gz
Import from my private repository. Snapshot after version 5.16, immediately
following the rewrite of the parsers. Repository revision number 3961.
Diffstat (limited to 'examples')
-rw-r--r--examples/Makefile37
-rw-r--r--examples/README40
-rw-r--r--examples/atoi/Makefile21
-rw-r--r--examples/atoi/atoi.rl60
-rw-r--r--examples/awkemu/Makefile21
-rw-r--r--examples/awkemu/awkemu.rl116
-rwxr-xr-xexamples/awkemu/awkequiv.awk10
-rw-r--r--examples/clang/Makefile21
-rw-r--r--examples/clang/clang.rl150
-rw-r--r--examples/concurrent/Makefile21
-rw-r--r--examples/concurrent/concurrent.rl126
-rw-r--r--examples/cppscan/Makefile41
-rw-r--r--examples/cppscan/cppscan.lex143
-rw-r--r--examples/cppscan/cppscan.rec183
-rw-r--r--examples/cppscan/cppscan.rl207
-rw-r--r--examples/format/Makefile21
-rw-r--r--examples/format/format.rl191
-rw-r--r--examples/gotocallret/Makefile21
-rw-r--r--examples/gotocallret/gotocallret.rl103
-rw-r--r--examples/mailbox/Makefile16
-rw-r--r--examples/mailbox/mailbox.rl206
-rw-r--r--examples/params/Makefile21
-rw-r--r--examples/params/params.rl104
-rw-r--r--examples/pullscan/Makefile23
-rw-r--r--examples/pullscan/pullscan.rl166
-rw-r--r--examples/rlscan/Makefile21
-rw-r--r--examples/rlscan/rlscan.rl298
-rw-r--r--examples/statechart/Makefile21
-rw-r--r--examples/statechart/statechart.rl114
-rw-r--r--examples/uri/uri.rl31
30 files changed, 2554 insertions, 0 deletions
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100644
index 00000000..e1e78089
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,37 @@
+#
+# Copyright 2002-2003 Adrian Thurston <thurston@cs.queensu.ca>
+#
+
+# This file is part of Ragel.
+#
+# Ragel is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Ragel is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ragel; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+SUBDIRS = \
+ atoi awkemu clang concurrent format gotocallret mailbox params rlscan \
+ statechart cppscan
+
+all:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) || exit 1; cd ..; done
+
+ps:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) ps || exit 1; cd ..; done
+
+clean:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) clean || exit 1; cd ..; done
+
+distclean:
+ @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) distclean || exit 1; cd ..; done
+
diff --git a/examples/README b/examples/README
new file mode 100644
index 00000000..12773cb3
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,40 @@
+
+ Ragel State Machine Compiler -- Examples
+ ========================================
+
+atoi -- Converts a string to an integer.
+
+awkemu -- Perfoms the basic parsing that the awk program perfoms on input.
+ The awk equivalent to awkemu is in awkemu/awkequiv.awk
+
+clang -- A scanner for a simple C like language. It breaks input up into
+ words, numbers, strings and symbols and strips out whitespace
+ and comments. It is a suitable template for writing a parser
+ that finds a sequence of tokens.
+
+concurrent -- Demonstrates the ability of ragel to produce parsers that
+ perform independent tasks concurrently.
+
+cppscan -- A C++ scanner that uses the longest match scanning method. This
+ example differs from other examples of scanning. Each run of the
+ state machine matches one token. This method results in a
+ smaller state machine since the final kleene star is omitted and
+ therefore every state does not need to get all the transitions
+ of the start state.
+
+format -- Partial printf implementation.
+
+gotocallret -- Demonstrate the use of fgoto, fcall and fret.
+
+mailbox -- Parses unix mailbox files. It breaks files into messages, and
+ messages into headers and body. It demonstrates Ragel's ability
+ to make parsers for structured file formats.
+
+params -- Parses command line arguements.
+
+rlscan -- Lexes Ragel input files.
+
+statechart -- Demonstrate the use of labels, the epsilon operator, and the
+ join operator for creating machines using the named state and
+ transition list paradigm. This implementes the same machine as
+ the atoi example.
diff --git a/examples/atoi/Makefile b/examples/atoi/Makefile
new file mode 100644
index 00000000..901de19a
--- /dev/null
+++ b/examples/atoi/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: atoi
+
+ps: atoi.ps
+
+atoi: atoi.o
+ g++ -g -o atoi atoi.o
+
+atoi.cpp: atoi.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) atoi.rl | $(RLCODEGEN) -G2 -o atoi.cpp
+
+atoi.o: atoi.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+atoi.ps: atoi.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) atoi.rl | $(RLCODEGEN) -V | dot -Tps > atoi.ps
+
+distclean clean:
+ rm -Rf *.o atoi.cpp atoi atoi.ps
diff --git a/examples/atoi/atoi.rl b/examples/atoi/atoi.rl
new file mode 100644
index 00000000..0d354a00
--- /dev/null
+++ b/examples/atoi/atoi.rl
@@ -0,0 +1,60 @@
+/*
+ * Convert a string to an integer.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+%%{
+ machine atoi;
+ write data noerror;
+}%%
+
+int atoi( char *str )
+{
+ char *p = str;
+ int cs, val = 0;
+ bool neg = false;;
+
+ %%{
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ main :=
+ ( '-'@see_neg | '+' )? ( digit @add_digit )+
+ '\n' @{ fbreak; };
+
+ # Inintialize and execute.
+ write init;
+ write exec noend;
+ }%%
+
+ if ( neg )
+ val = -1 * val;
+
+ if ( cs < atoi_first_final )
+ cerr << "atoi: there was an error" << endl;
+
+ return val;
+};
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ int value = atoi( buf );
+ cout << value << endl;
+ }
+ return 0;
+}
diff --git a/examples/awkemu/Makefile b/examples/awkemu/Makefile
new file mode 100644
index 00000000..5e6ecde4
--- /dev/null
+++ b/examples/awkemu/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: awkemu
+
+ps: awkemu.ps
+
+awkemu: awkemu.o
+ gcc -g -o awkemu awkemu.o
+
+awkemu.c: awkemu.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) awkemu.rl | $(RLCODEGEN) -G2 -o awkemu.c
+
+awkemu.ps: awkemu.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) awkemu.rl | $(RLCODEGEN) -V | dot -Tps > awkemu.ps
+
+%.o: %.c
+ gcc -pedantic -Wall -g -c -O3 -o $@ $<
+
+distclean clean:
+ rm -Rf *.o awkemu.c awkemu awkemu.ps
diff --git a/examples/awkemu/awkemu.rl b/examples/awkemu/awkemu.rl
new file mode 100644
index 00000000..6615943d
--- /dev/null
+++ b/examples/awkemu/awkemu.rl
@@ -0,0 +1,116 @@
+/*
+ * Perform the basic line parsing of input performed by awk.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+%%{
+ machine awkemu;
+
+ action start_word {
+ ws[nwords] = fpc;
+ }
+
+ action end_word {
+ we[nwords++] = fpc;
+ }
+
+ action start_line {
+ nwords = 0;
+ ls = fpc;
+ }
+
+ action end_line {
+ printf("endline(%i): ", nwords );
+ fwrite( ls, 1, p - ls, stdout );
+ printf("\n");
+
+ for ( i = 0; i < nwords; i++ ) {
+ printf(" word: ");
+ fwrite( ws[i], 1, we[i] - ws[i], stdout );
+ printf("\n");
+ }
+ }
+
+ # Words in a line.
+ word = ^[ \t\n]+;
+
+ # The whitespace separating words in a line.
+ whitespace = [ \t];
+
+ # The components in a line to break up. Either a word or a single char of
+ # whitespace. On the word capture characters.
+ blineElements = word >start_word %end_word | whitespace;
+
+ # Star the break line elements. Just be careful to decrement the leaving
+ # priority as we don't want multiple character identifiers to be treated as
+ # multiple single char identifiers.
+ line = ( blineElements** '\n' ) >start_line @end_line;
+
+ # Any number of lines.
+ main := line*;
+}%%
+
+%% write data noerror nofinal;
+
+#define MAXWORDS 256
+#define BUFSIZE 4096
+char buf[BUFSIZE];
+
+int main()
+{
+ int i, nwords = 0;
+ char *ls = 0;
+ char *ws[MAXWORDS];
+ char *we[MAXWORDS];
+
+ int cs;
+ int have = 0;
+
+ %% write init;
+
+ while ( 1 ) {
+ char *p, *pe, *data = buf + have;
+ int len, space = BUFSIZE - have;
+ /* fprintf( stderr, "space: %i\n", space ); */
+
+ if ( space == 0 ) {
+ fprintf(stderr, "buffer out of space\n");
+ exit(1);
+ }
+
+ len = fread( data, 1, space, stdin );
+ /* fprintf( stderr, "len: %i\n", len ); */
+ if ( len == 0 )
+ break;
+
+ /* Find the last newline by searching backwards. This is where
+ * we will stop processing on this iteration. */
+ p = buf;
+ pe = buf + have + len - 1;
+ while ( *pe != '\n' && pe >= buf )
+ pe--;
+ pe += 1;
+
+ /* fprintf( stderr, "running on: %i\n", pe - p ); */
+
+ %% write exec;
+
+ /* How much is still in the buffer. */
+ have = data + len - pe;
+ if ( have > 0 )
+ memmove( buf, pe, have );
+
+ /* fprintf(stderr, "have: %i\n", have ); */
+
+ if ( len < space )
+ break;
+ }
+
+ if ( have > 0 )
+ fprintf(stderr, "input not newline terminated\n");
+ return 0;
+}
diff --git a/examples/awkemu/awkequiv.awk b/examples/awkemu/awkequiv.awk
new file mode 100755
index 00000000..9877dd36
--- /dev/null
+++ b/examples/awkemu/awkequiv.awk
@@ -0,0 +1,10 @@
+#!/usr/bin/awk -f
+#
+
+
+{
+ print "endline(" NF "): " $0
+ for ( i = 1; i <= NF; i++ ) {
+ print " word: " $i
+ }
+}
diff --git a/examples/clang/Makefile b/examples/clang/Makefile
new file mode 100644
index 00000000..d3054060
--- /dev/null
+++ b/examples/clang/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: clang
+
+ps: clang.ps
+
+clang: clang.o
+ gcc -g -o clang clang.o
+
+clang.c: clang.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) clang.rl | $(RLCODEGEN) -G2 -o clang.c
+
+clang.ps: clang.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) clang.rl | $(RLCODEGEN) -V | dot -Tps > clang.ps
+
+%.o: %.c
+ gcc -pedantic -Wall -O3 -g -c -o $@ $<
+
+distclean clean:
+ rm -Rf *.o clang.c clang clang.ps
diff --git a/examples/clang/clang.rl b/examples/clang/clang.rl
new file mode 100644
index 00000000..7ecfeefd
--- /dev/null
+++ b/examples/clang/clang.rl
@@ -0,0 +1,150 @@
+/*
+ * A mini C-like language scanner.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+%%{
+ machine clang;
+
+ newline = '\n' @{curline += 1;};
+ any_count_line = any | newline;
+
+ # Consume a C comment.
+ c_comment := any_count_line* :>> '*/' @{fgoto main;};
+
+ main := |*
+
+ # Alpha numberic characters or underscore.
+ alnum_u = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alpha_u = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ ( punct - [_'"] ) {
+ printf( "symbol(%i): %c\n", curline, tokstart[0] );
+ };
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ alpha_u alnum_u* {
+ printf( "ident(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Single Quote.
+ sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
+ '\'' . sliteralChar* . '\'' {
+ printf( "single_lit(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Double Quote.
+ dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
+ '"' . dliteralChar* . '"' {
+ printf( "double_lit(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Whitespace is standard ws, newlines and control codes.
+ any_count_line - 0x21..0x7e;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ '//' [^\n]* newline;
+
+ '/*' { fgoto c_comment; };
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ digit+ {
+ printf( "int(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ digit+ '.' digit+ {
+ printf( "float(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ '0x' xdigit+ {
+ printf( "hex(%i): ", curline );
+ fwrite( tokstart, 1, tokend-tokstart, stdout );
+ printf("\n");
+ };
+
+ *|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 128
+
+void scanner()
+{
+ static char buf[BUFSIZE];
+ int cs, act, have = 0, curline = 1;
+ char *tokstart, *tokend = 0;
+ int done = 0;
+
+ %% write init;
+
+ while ( !done ) {
+ char *p = buf + have, *pe;
+ int len, space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We've used up the entire buffer storing an already-parsed token
+ * prefix that must be preserved. */
+ fprintf(stderr, "OUT OF BUFFER SPACE\n" );
+ exit(1);
+ }
+
+ len = fread( p, 1, space, stdin );
+
+ /* If this is the last buffer, tack on an EOF. */
+ if ( len < space ) {
+ p[len++] = 0;
+ done = 1;
+ }
+
+ pe = p + len;
+ %% write exec;
+
+ if ( cs == clang_error ) {
+ fprintf(stderr, "PARSE ERROR\n" );
+ break;
+ }
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - tokstart;
+ memmove( buf, tokstart, have );
+ tokend = buf + (tokend-tokstart);
+ tokstart = buf;
+ }
+ }
+}
+
+int main()
+{
+ scanner();
+ return 0;
+}
+
diff --git a/examples/concurrent/Makefile b/examples/concurrent/Makefile
new file mode 100644
index 00000000..b9a09f6e
--- /dev/null
+++ b/examples/concurrent/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: concurrent
+
+ps: concurrent.ps
+
+concurrent: concurrent.o
+ g++ -g -o concurrent concurrent.o
+
+concurrent.cpp: concurrent.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) concurrent.rl | $(RLCODEGEN) -G2 -o concurrent.cpp
+
+concurrent.ps: concurrent.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) concurrent.rl | $(RLCODEGEN) -V | dot -Tps > concurrent.ps
+
+%.o: %.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+distclean clean:
+ rm -Rf *.o concurrent.cpp concurrent concurrent.ps
diff --git a/examples/concurrent/concurrent.rl b/examples/concurrent/concurrent.rl
new file mode 100644
index 00000000..b70fd5df
--- /dev/null
+++ b/examples/concurrent/concurrent.rl
@@ -0,0 +1,126 @@
+/*
+ * Show off concurrent abilities.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+struct Concurrent
+{
+ int cur_char;
+ int start_word;
+ int start_comment;
+ int start_literal;
+
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine Concurrent;
+
+ action next_char {
+ cur_char += 1;
+ }
+
+ action start_word {
+ start_word = cur_char;
+ }
+ action end_word {
+ cout << "word: " << start_word <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_comment {
+ start_comment = cur_char;
+ }
+ action end_comment {
+ cout << "comment: " << start_comment <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_literal {
+ start_literal = cur_char;
+ }
+ action end_literal {
+ cout << "literal: " << start_literal <<
+ " " << cur_char-1 << endl;
+ }
+
+ # Count characters.
+ chars = ( any @next_char )*;
+
+ # Words are non-whitespace.
+ word = ( any-space )+ >start_word %end_word;
+ words = ( ( word | space ) $1 %0 )*;
+
+ # Finds C style comments.
+ comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment;
+ comments = ( comment | any )**;
+
+ # Finds single quoted strings.
+ literalChar = ( any - ['\\] ) | ( '\\' . any );
+ literal = ('\'' literalChar* '\'' ) >start_literal %end_literal;
+ literals = ( ( literal | (any-'\'') ) $1 %0 )*;
+
+ main := chars | words | comments | literals;
+}%%
+
+%% write data;
+
+int Concurrent::init( )
+{
+ %% write init;
+ cur_char = 0;
+ return 1;
+}
+
+int Concurrent::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+int Concurrent::finish( )
+{
+ %% write eof;
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+Concurrent concurrent;
+char buf[BUFSIZE];
+
+int main()
+{
+ concurrent.init();
+ while ( 1 ) {
+ int len = fread( buf, 1, BUFSIZE, stdin );
+ concurrent.execute( buf, len );
+ if ( len != BUFSIZE )
+ break;
+ }
+
+ if ( concurrent.finish() <= 0 )
+ cerr << "concurrent: error parsing input" << endl;
+ return 0;
+}
diff --git a/examples/cppscan/Makefile b/examples/cppscan/Makefile
new file mode 100644
index 00000000..6a92c82a
--- /dev/null
+++ b/examples/cppscan/Makefile
@@ -0,0 +1,41 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+FLEX = flex
+RE2C = re2c
+
+CFLAGS = -Wall -g -O3
+
+all: cppscan lex-cppscan re2c-cppscan
+
+ps: cppscan.ps
+
+cppscan: cppscan.o
+ g++ -g -o $@ $<
+
+lex-cppscan: lex-cppscan.o
+ g++ -g -o $@ $<
+
+re2c-cppscan: re2c-cppscan.o
+ g++ -g -o $@ $<
+
+cppscan.cpp: cppscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) cppscan.rl | $(RLCODEGEN) -G2 -o $@
+
+lex-cppscan.cpp: cppscan.lex
+ $(FLEX) -f -o $@ $<
+
+re2c-cppscan.cpp: cppscan.rec
+ $(RE2C) -s $< > $@
+
+example.cpp: example.rec
+ $(RE2C) -s $< > $@
+
+%.o: %.cpp
+ g++ $(CFLAGS) -c -o $@ $<
+
+cppscan.ps: cppscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) cppscan.rl | $(RLCODEGEN) -V | dot -Tps > cppscan.ps
+
+distclean clean:
+ rm -Rf *.o cppscan.cpp cppscan cppscan.ps \
+ lex-cppscan lex-cppscan.cpp re2c-cppscan re2c-cppscan.cpp
diff --git a/examples/cppscan/cppscan.lex b/examples/cppscan/cppscan.lex
new file mode 100644
index 00000000..fb662538
--- /dev/null
+++ b/examples/cppscan/cppscan.lex
@@ -0,0 +1,143 @@
+/*
+ * flex equivalent to cppscan.rl
+ */
+
+%{
+
+#include <stdio.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ fputc( data[i], stdout );
+ fputc( '\n', stdout );
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+
+%}
+
+%x COMMENT
+
+FRACT_CONST [0-9]*\.[0-9]+|[0-9]+\.
+EXPONENT [eE][+\-]?[0-9]+
+FLOAT_SUFFIX [flFL]
+
+%%
+
+ /* Single and double literals. */
+L?\'([^\'\\\n]|\\.)*\' {
+ token( TK_Slit, yytext, yyleng );
+}
+
+L?\"([^\"\\\n]|\\.)*\" {
+ token( TK_Dlit, yytext, yyleng );
+}
+
+[a-zA-Z_][a-zA-Z0-9_]* {
+ token( TK_Id, yytext, yyleng );
+}
+
+{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? {
+ token( TK_Float, yytext, yyleng );
+}
+
+(0|[1-9][0-9]*)[ulUL]{0,3} {
+ token( TK_IntegerDecimal, yytext, yyleng );
+}
+
+0[0-9]+[ulUL]{0,2} {
+ token( TK_IntegerOctal, yytext, yyleng );
+}
+
+0x[0-9a-fA-F]+[ulUL]{0,2} {
+ token( TK_IntegerHex, yytext, yyleng );
+}
+
+:: token( TK_NameSep, yytext, yyleng );
+== token( TK_EqualsEquals, yytext, yyleng );
+!= token( TK_NotEquals, yytext, yyleng );
+&& token( TK_AndAnd, yytext, yyleng );
+\|\| token( TK_OrOr, yytext, yyleng );
+\*= token( TK_MultAssign, yytext, yyleng );
+\/= token( TK_DivAssign, yytext, yyleng );
+%= token( TK_PercentAssign, yytext, yyleng );
+\+= token( TK_PlusAssign, yytext, yyleng );
+-= token( TK_MinusAssign, yytext, yyleng );
+&= token( TK_AmpAssign, yytext, yyleng );
+^= token( TK_CaretAssign, yytext, yyleng );
+\|= token( TK_BarAssign, yytext, yyleng );
+\+\+ token( TK_PlusPlus, yytext, yyleng );
+-- token( TK_MinusMinus, yytext, yyleng );
+-> token( TK_Arrow, yytext, yyleng );
+->\* token( TK_ArrowStar, yytext, yyleng );
+\.\* token( TK_DotStar, yytext, yyleng );
+\.\.\. token( TK_DotDotDot, yytext, yyleng );
+
+\/\* BEGIN(COMMENT);
+<COMMENT>\*\/ BEGIN(INITIAL);
+<COMMENT>(.|\n) { }
+
+\/\/.*\n {}
+[^!-~]+ {}
+
+[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng );
+
+%%
+
+int yywrap()
+{
+ /* Once the input is done, no more. */
+ return 1;
+}
+
+int main()
+{
+ yylex();
+}
diff --git a/examples/cppscan/cppscan.rec b/examples/cppscan/cppscan.rec
new file mode 100644
index 00000000..43f297d8
--- /dev/null
+++ b/examples/cppscan/cppscan.rec
@@ -0,0 +1,183 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ fputc( data[i], stdout );
+ fputc( '\n', stdout );
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+#define BUFSIZE 8192
+char buf[BUFSIZE];
+
+void fill( int n )
+{
+ printf("fill(%i)\n", n);
+ exit(1);
+}
+
+int main()
+{
+ char *start, *p = buf, *lim = buf, *marker;
+ int len, have, want, shift;
+ int done = 0;
+
+#define YYCTYPE char
+
+#define YYCURSOR p
+#define YYLIMIT lim
+#define YYMARKER marker
+
+#define YYFILL(n) { \
+ if ( ! done ) { \
+ have = lim-start; \
+ if ( start > buf ) { \
+ shift = start-buf; \
+ memmove( buf, start, have ); \
+ start -= shift; \
+ p -= shift; \
+ lim -= shift; \
+ marker -= shift; \
+ } \
+ want = BUFSIZE - have - 1; \
+ len = fread( lim, 1, want, stdin ); \
+ lim += len; \
+ if ( len < want ) { \
+ *lim++ = 0; \
+ done = 1; \
+ } \
+ } \
+ }
+
+again:
+ start = p;
+
+/*!re2c
+
+ANY = [\000-\377];
+FRACTCONST = ( [0-9]* "." [0-9]+ ) | [0-9]+ ".";
+EXPONENT = [eE] [+\-]? [0-9]+;
+FLOATSUFFIX = [flFL];
+
+ "L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" {
+ token( TK_Slit, start, p-start );
+ goto again;
+ }
+
+ "L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" {
+ token( TK_Dlit, start, p-start );
+ goto again;
+ }
+
+ [a-zA-Z_][a-zA-Z0-9_]* {
+ token( TK_Id, start, p-start );
+ goto again;
+ }
+
+ ( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) {
+ token( TK_Float, start, p-start );
+ goto again;
+ }
+
+
+ ( "0" | [1-9][0-9]* ) [ulUL]* {
+ token( TK_IntegerDecimal, start, p-start );
+ goto again;
+ }
+
+ "0" [0-9]+ [ulUL]* {
+ token( TK_IntegerOctal, start, p-start );
+ goto again;
+ }
+
+ "0x" [0-9a-fA-F]+[ulUL]* {
+ token( TK_IntegerHex, start, p-start );
+ goto again;
+ }
+
+ "::" { token( TK_NameSep, start, p-start ); goto again; }
+ "==" { token( TK_EqualsEquals, start, p-start ); goto again; }
+ "!=" { token( TK_NotEquals, start, p-start ); goto again; }
+ "&&" { token( TK_AndAnd, start, p-start ); goto again; }
+ "||" { token( TK_OrOr, start, p-start ); goto again; }
+ "*=" { token( TK_MultAssign, start, p-start ); goto again; }
+ "/=" { token( TK_DivAssign, start, p-start ); goto again; }
+ "%=" { token( TK_PercentAssign, start, p-start ); goto again; }
+ "+=" { token( TK_PlusAssign, start, p-start ); goto again; }
+ "-=" { token( TK_MinusAssign, start, p-start ); goto again; }
+ "&=" { token( TK_AmpAssign, start, p-start ); goto again; }
+ "^=" { token( TK_CaretAssign, start, p-start ); goto again; }
+ "|=" { token( TK_BarAssign, start, p-start ); goto again; }
+ "++" { token( TK_PlusPlus, start, p-start ); goto again; }
+ "--" { token( TK_MinusMinus, start, p-start ); goto again; }
+ "->" { token( TK_Arrow, start, p-start ); goto again; }
+ "->*" { token( TK_ArrowStar, start, p-start ); goto again; }
+ ".*" { token( TK_DotStar, start, p-start ); goto again; }
+ "..." { token( TK_DotDotDot, start, p-start ); goto again; }
+
+ "/*" { goto comment; }
+ "//" (ANY\"\n")* "\n" { goto again; }
+ [\001-\040\177]+ { goto again; }
+
+ [\041-\057\072-\100\133-\140\173-\176] {
+ token( *start, start, p-start );
+ goto again;
+ }
+ "\000" { return 0; }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto again; }
+ ANY { goto comment; }
+*/
+}
diff --git a/examples/cppscan/cppscan.rl b/examples/cppscan/cppscan.rl
new file mode 100644
index 00000000..5c979ebe
--- /dev/null
+++ b/examples/cppscan/cppscan.rl
@@ -0,0 +1,207 @@
+/*
+ * A C++ scanner. Uses the longest match construction.
+ * << <= <<= >> >= >>= are left out since angle brackets are used in templates.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <iostream>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+#define BUFSIZE 16384
+
+/* EOF char used to flush out that last token. This should be a whitespace
+ * token. */
+
+#define LAST_CHAR 0
+
+using std::cerr;
+using std::cout;
+using std::cin;
+using std::endl;
+
+static char buf[BUFSIZE];
+static int line = 1, col = 1;
+static char *tokstart, *tokend;
+static int act, have = 0;
+static int cs;
+
+%%{
+ machine Scanner;
+ write data nofinal;
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ c_comment :=
+ any* :>> '*/'
+ @{ fgoto main; };
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" )
+ {token( TK_Slit );};
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' )
+ {token( TK_Dlit );};
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* )
+ {token( TK_Id );};
+
+ # Floating literals.
+ ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? )
+ {token( TK_Float );};
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )
+ {token( TK_IntegerDecimal );};
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} )
+ {token( TK_IntegerOctal );};
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) )
+ {token( TK_IntegerHex );};
+
+ # Only buffer the second item, first buffered by symbol. */
+ '::' {token( TK_NameSep );};
+ '==' {token( TK_EqualsEquals );};
+ '!=' {token( TK_NotEquals );};
+ '&&' {token( TK_AndAnd );};
+ '||' {token( TK_OrOr );};
+ '*=' {token( TK_MultAssign );};
+ '/=' {token( TK_DivAssign );};
+ '%=' {token( TK_PercentAssign );};
+ '+=' {token( TK_PlusAssign );};
+ '-=' {token( TK_MinusAssign );};
+ '&=' {token( TK_AmpAssign );};
+ '^=' {token( TK_CaretAssign );};
+ '|=' {token( TK_BarAssign );};
+ '++' {token( TK_PlusPlus );};
+ '--' {token( TK_MinusMinus );};
+ '->' {token( TK_Arrow );};
+ '->*' {token( TK_ArrowStar );};
+ '.*' {token( TK_DotStar );};
+
+ # Three char compounds, first item already buffered. */
+ '...' {token( TK_DotDotDot );};
+
+ # Single char symbols.
+ ( punct - [_"'] ) {token( tokstart[0] );};
+
+ # Comments and whitespace.
+ '/*' { fgoto c_comment; };
+ '//' [^\n]* '\n';
+ ( any - 33..126 )+;
+
+ *|;
+}%%
+
+void token( int tok )
+{
+ char *data = tokstart;
+ int len = tokend - tokstart;
+
+ cout << '<' << tok << "> ";
+ cout.write( data, len );
+ cout << '\n';
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ %% write init;
+
+ /* Do the first read. */
+ bool done = false;
+ while ( !done ) {
+ char *p = buf + have;
+ int space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ cerr << "OUT OF BUFFER SPACE" << endl;
+ exit(1);
+ }
+
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = LAST_CHAR, len++;
+ done = true;
+ }
+
+ char *pe = p + len;
+ %% write exec;
+
+ /* Check if we failed. */
+ if ( cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ /* Now set up the prefix. */
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - tokstart;
+ memmove( buf, tokstart, have );
+ tokend -= (tokstart-buf);
+ tokstart = buf;
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/format/Makefile b/examples/format/Makefile
new file mode 100644
index 00000000..d5ac829b
--- /dev/null
+++ b/examples/format/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: format
+
+ps: format.ps
+
+format: format.o
+ gcc -g -o format format.o
+
+format.c: format.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) format.rl | $(RLCODEGEN) -G2 -o format.c
+
+format.ps: format.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) format.rl | $(RLCODEGEN) -V | dot -Tps > format.ps
+
+%.o: %.c
+ gcc -Wall -O3 -g -c -o $@ $<
+
+distclean clean:
+ rm -Rf *.o format.c format format.ps
diff --git a/examples/format/format.rl b/examples/format/format.rl
new file mode 100644
index 00000000..ea5fdfb5
--- /dev/null
+++ b/examples/format/format.rl
@@ -0,0 +1,191 @@
+/*
+ * Partial printf implementation.
+ */
+
+#define BUFLEN 1024
+#include <stdio.h>
+
+typedef void (*WriteFunc)( char *data, int len );
+
+struct format
+{
+ char buf[BUFLEN+1];
+ int buflen;
+ WriteFunc write;
+
+ int flags;
+ int width;
+ int prec;
+ int cs;
+};
+
+void do_conv( struct format *fsm, char c )
+{
+ printf( "flags: %x\n", fsm->flags );
+ printf( "width: %i\n", fsm->width );
+ printf( "prec: %i\n", fsm->prec );
+ printf( "conv: %c\n", c );
+ printf( "\n" );
+}
+
+#define FL_HASH 0x01
+#define FL_ZERO 0x02
+#define FL_DASH 0x04
+#define FL_SPACE 0x08
+#define FL_PLUS 0x10
+
+#define FL_HAS_WIDTH 0x0100
+#define FL_WIDTH_ARG 0x0200
+#define FL_HAS_PREC 0x0400
+#define FL_PREC_ARG 0x0800
+
+#define FL_LEN_H 0x010000
+#define FL_LEN_HH 0x020000
+#define FL_LEN_L 0x040000
+#define FL_LEN_LL 0x080000
+
+%%{
+ machine format;
+ access fsm->;
+
+ action clear {
+ fsm->flags = 0;
+ fsm->width = 0;
+ fsm->prec = 0;
+ }
+
+ # A non-zero number.
+ nznum = [1-9] [0-9]*;
+
+ # Width
+ action width_num { fsm->width = 10 * fsm->width + (fc-'0'); }
+ action width_arg { fsm->flags |= FL_WIDTH_ARG; }
+ action width { fsm->flags |= FL_HAS_WIDTH; }
+ width = ( ( nznum $width_num | '*' @width_arg ) %width )?;
+
+ # Precision
+ action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); }
+ action prec_arg { fsm->flags |= FL_PREC_ARG; }
+ action prec { fsm->flags |= FL_HAS_PREC; }
+ precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?;
+
+ # Flags
+ action flags_hash { fsm->flags |= FL_HASH; }
+ action flags_zero { fsm->flags |= FL_ZERO; }
+ action flags_dash { fsm->flags |= FL_DASH; }
+ action flags_space { fsm->flags |= FL_SPACE; }
+ action flags_plus { fsm->flags |= FL_PLUS; }
+
+ flags = (
+ '#' @flags_hash |
+ '0' @flags_zero |
+ '-' @flags_dash |
+ ' ' @flags_space |
+ '+' @flags_plus )*;
+
+ action length_h { fsm->flags |= FL_LEN_H; }
+ action length_l { fsm->flags |= FL_LEN_L; }
+ action length_hh { fsm->flags |= FL_LEN_HH; }
+ action length_ll { fsm->flags |= FL_LEN_LL; }
+
+ # Must use leaving transitions on 'h' and 'l' because they are
+ # prefixes for 'hh' and 'll'.
+ length = (
+ 'h' %length_h |
+ 'l' %length_l |
+ 'hh' @length_hh |
+ 'll' @length_ll )?;
+
+ action conversion {
+ do_conv( fsm, fc );
+ }
+
+ conversion = [diouxXcsp] @conversion;
+
+ fmt_spec =
+ '%' @clear
+ flags
+ width
+ precision
+ length
+ conversion;
+
+ action emit {
+ if ( fsm->buflen == BUFLEN ) {
+ fsm->write( fsm->buf, fsm->buflen );
+ fsm->buflen = 0;
+ }
+ fsm->buf[fsm->buflen++] = fc;
+ }
+
+ action finish_ok {
+ if ( fsm->buflen > 0 )
+ fsm->write( fsm->buf, fsm->buflen );
+ }
+ action finish_err {
+ printf("EOF IN FORMAT\n");
+ }
+ action err_char {
+ printf("ERROR ON CHAR: 0x%x\n", fc );
+ }
+
+ main := (
+ [^%] @emit |
+ '%%' @emit |
+ fmt_spec
+ )* @/finish_err %/finish_ok $!err_char;
+}%%
+
+%% write data;
+
+void format_init( struct format *fsm )
+{
+ fsm->buflen = 0;
+ %% write init;
+}
+
+void format_execute( struct format *fsm, const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+int format_finish( struct format *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == format_error )
+ return -1;
+ if ( fsm->cs >= format_first_final )
+ return 1;
+ return 0;
+}
+
+
+#define INPUT_BUFSIZE 2048
+
+struct format fsm;
+char buf[INPUT_BUFSIZE];
+
+void write(char *data, int len )
+{
+ fwrite( data, 1, len, stdout );
+}
+
+int main()
+{
+ fsm.write = write;
+ format_init( &fsm );
+ while ( 1 ) {
+ int len = fread( buf, 1, INPUT_BUFSIZE, stdin );
+ format_execute( &fsm, buf, len );
+ if ( len != INPUT_BUFSIZE )
+ break;
+ }
+ if ( format_finish( &fsm ) <= 0 )
+ printf("FAIL\n");
+ return 0;
+}
+
diff --git a/examples/gotocallret/Makefile b/examples/gotocallret/Makefile
new file mode 100644
index 00000000..13f9818d
--- /dev/null
+++ b/examples/gotocallret/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: gotocallret
+
+ps: gotocallret.ps
+
+gotocallret: gotocallret.o
+ g++ -g -o gotocallret gotocallret.o
+
+gotocallret.cpp: gotocallret.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) gotocallret.rl | $(RLCODEGEN) -G2 -o gotocallret.cpp
+
+gotocallret.o: gotocallret.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+gotocallret.ps: gotocallret.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) gotocallret.rl | $(RLCODEGEN) -V | dot -Tps > gotocallret.ps
+
+distclean clean:
+ rm -Rf *.o gotocallret.cpp gotocallret gotocallret.ps
diff --git a/examples/gotocallret/gotocallret.rl b/examples/gotocallret/gotocallret.rl
new file mode 100644
index 00000000..84384a9c
--- /dev/null
+++ b/examples/gotocallret/gotocallret.rl
@@ -0,0 +1,103 @@
+/*
+ * Demonstrate the use of goto, call and return. This machine expects either a
+ * lower case char or a digit as a command then a space followed by the command
+ * arg. If the command is a char, then the arg must be an a string of chars.
+ * If the command is a digit, then the arg must be a string of digits. This
+ * choice is determined by action code, rather than though transition
+ * desitinations.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+struct GotoCallRet
+{
+ char comm;
+ int cs, top, stack[32];
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine GotoCallRet;
+
+ # Error machine, consumes to end of
+ # line, then starts the main line over.
+ garble_line := (
+ (any-'\n')*'\n'
+ ) >{cout << "error: garbling line" << endl;} @{fgoto main;};
+
+ # Look for a string of alphas or of digits,
+ # on anything else, hold the character and return.
+ alp_comm := alpha+ $!{fhold;fret;};
+ dig_comm := digit+ $!{fhold;fret;};
+
+ # Choose which to machine to call into based on the command.
+ action comm_arg {
+ if ( comm >= 'a' )
+ fcall alp_comm;
+ else
+ fcall dig_comm;
+ }
+
+ # Specifies command string. Note that the arg is left out.
+ command = (
+ [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n'
+ ) @{cout << "correct command" << endl;};
+
+ # Any number of commands. If there is an
+ # error anywhere, garble the line.
+ main := command* $!{fhold;fgoto garble_line;};
+}%%
+
+%% write data;
+
+int GotoCallRet::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int GotoCallRet::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+ if ( cs == GotoCallRet_error )
+ return -1;
+ if ( cs >= GotoCallRet_first_final )
+ return 1;
+ return 0;
+}
+
+int GotoCallRet::finish( )
+{
+ %% write eof;
+ if ( cs == GotoCallRet_error )
+ return -1;
+ if ( cs >= GotoCallRet_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+
+ GotoCallRet gcr;
+ gcr.init();
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ gcr.execute( buf, strlen(buf) );
+ }
+ if ( gcr.finish() <= 0 )
+ cerr << "gotocallret: error: parsing input" << endl;
+ return 0;
+}
diff --git a/examples/mailbox/Makefile b/examples/mailbox/Makefile
new file mode 100644
index 00000000..94d66800
--- /dev/null
+++ b/examples/mailbox/Makefile
@@ -0,0 +1,16 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: mailbox
+
+mailbox: mailbox.o
+ g++ -g -o mailbox mailbox.o
+
+mailbox.cpp: mailbox.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) mailbox.rl | $(RLCODEGEN) -G2 -o mailbox.cpp
+
+%.o: %.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+distclean clean:
+ rm -Rf *.o mailbox.cpp mailbox mailbox.ps
diff --git a/examples/mailbox/mailbox.rl b/examples/mailbox/mailbox.rl
new file mode 100644
index 00000000..74e33108
--- /dev/null
+++ b/examples/mailbox/mailbox.rl
@@ -0,0 +1,206 @@
+/*
+ * Parses unix mail boxes into headers and bodies.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+/* A growable buffer for collecting headers. */
+struct Buffer
+{
+ Buffer() : data(0), allocated(0), length(0) { }
+ ~Buffer() { empty(); }
+
+ void append( char p ) {
+ if ( ++length > allocated )
+ upAllocate( length*2 );
+ data[length-1] = p;
+ }
+
+ void clear() { length = 0; }
+ void upAllocate( int len );
+ void empty();
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+
+struct MailboxScanner
+{
+ Buffer headName;
+ Buffer headContent;
+
+ int cs, top, stack[1];
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine MailboxScanner;
+
+ # Buffer the header names.
+ action bufHeadName { headName.append(fc); }
+
+ # Prints a blank line after the end of the headers of each message.
+ action blankLine { cout << endl; }
+
+ # Helpers we will use in matching the date section of the from line.
+ day = /[A-Z][a-z][a-z]/;
+ month = /[A-Z][a-z][a-z]/;
+ year = /[0-9][0-9][0-9][0-9]/;
+ time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' );
+ letterZone = /[A-Z][A-Z][A-Z]/;
+ numZone = /[+\-][0-9][0-9][0-9][0-9]/;
+ zone = letterZone | numZone;
+ dayNum = /[0-9 ][0-9]/;
+
+ # These are the different formats of the date minus an obscure
+ # type that has a funny string 'remote from xxx' on the end. Taken
+ # from c-client in the imap-2000 distribution.
+ date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' .
+ ( year | year . ' ' . zone | zone . ' ' . year );
+
+ # From lines separate messages. We will exclude fromLine from a message
+ # body line. This will cause us to stay in message line up until an
+ # entirely correct from line is matched.
+ fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n';
+
+ # The types of characters that can be used as a header name.
+ hchar = print - [ :];
+
+ # Simply eat up an uninteresting header. Return at the first non-ws
+ # character following a newline.
+ consumeHeader := (
+ [^\n] |
+ '\n' [ \t] |
+ '\n' [^ \t] @{fhold; fret;}
+ )*;
+
+ action hchar {headContent.append(fc);}
+ action hspace {headContent.append(' ');}
+
+ action hfinish {
+ headContent.append(0);
+ cout << headContent.data << endl;
+ headContent.clear();
+ fhold;
+ fret;
+ }
+
+ # Display the contents of a header as it is consumed. Collapses line
+ # continuations to a single space.
+ printHeader := (
+ [^\n] @hchar |
+ ( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace
+ )** $!hfinish;
+
+ action onHeader
+ {
+ headName.append(0);
+ if ( strcmp( headName.data, "From" ) == 0 ||
+ strcmp( headName.data, "To" ) == 0 ||
+ strcmp( headName.data, "Subject" ) == 0 )
+ {
+ /* Print the header name, then jump to a machine the will display
+ * the contents. */
+ cout << headName.data << ":";
+ headName.clear();
+ fcall printHeader;
+ }
+
+ headName.clear();
+ fcall consumeHeader;
+ }
+
+ header = hchar+ $bufHeadName ':' @onHeader;
+
+ # Exclude fromLine from a messageLine, otherwise when encountering a
+ # fromLine we will be simultaneously matching the old message and a new
+ # message.
+ messageLine = ( [^\n]* '\n' - fromLine );
+
+ # An entire message.
+ message = ( fromLine . header* . '\n' @blankLine . messageLine* );
+
+ # File is a series of messages.
+ main := message*;
+}%%
+
+%% write data;
+
+int MailboxScanner::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int MailboxScanner::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == MailboxScanner_error )
+ return -1;
+ if ( cs >= MailboxScanner_first_final )
+ return 1;
+ return 0;
+}
+
+int MailboxScanner::finish( )
+{
+ %% write eof;
+ if ( cs == MailboxScanner_error )
+ return -1;
+ if ( cs >= MailboxScanner_first_final )
+ return 1;
+ return 0;
+}
+
+
+void Buffer::empty()
+{
+ if ( data != 0 ) {
+ free( data );
+
+ data = 0;
+ length = 0;
+ allocated = 0;
+ }
+}
+
+void Buffer::upAllocate( int len )
+{
+ if ( data == 0 )
+ data = (char*) malloc( len );
+ else
+ data = (char*) realloc( data, len );
+ allocated = len;
+}
+
+MailboxScanner mailbox;
+char buf[BUFSIZE];
+
+int main()
+{
+ mailbox.init();
+ while ( 1 ) {
+ int len = fread( buf, 1, BUFSIZE, stdin );
+ mailbox.execute( buf, len );
+ if ( len != BUFSIZE )
+ break;
+ }
+ if ( mailbox.finish() <= 0 )
+ cerr << "mailbox: error parsing input" << endl;
+ return 0;
+}
diff --git a/examples/params/Makefile b/examples/params/Makefile
new file mode 100644
index 00000000..98b950ca
--- /dev/null
+++ b/examples/params/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: params
+
+ps: params.ps
+
+params: params.o
+ gcc -g -o params params.o
+
+params.c: params.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) params.rl | $(RLCODEGEN) -G2 -o params.c
+
+params.ps: params.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) params.rl | $(RLCODEGEN) -V | dot -Tps > params.ps
+
+%.o: %.c
+ gcc -Wall -O3 -g -c -o $@ $<
+
+distclean clean:
+ rm -Rf *.o params.c params params.ps
diff --git a/examples/params/params.rl b/examples/params/params.rl
new file mode 100644
index 00000000..3cf908ff
--- /dev/null
+++ b/examples/params/params.rl
@@ -0,0 +1,104 @@
+/*
+ * Parse command line arguments.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define BUFLEN 1024
+
+struct params
+{
+ char buffer[BUFLEN+1];
+ int buflen;
+ int cs;
+};
+
+%%{
+ machine params;
+ access fsm->;
+
+ # A buffer to collect argurments
+
+ # Append to the buffer.
+ action append {
+ if ( fsm->buflen < BUFLEN )
+ fsm->buffer[fsm->buflen++] = fc;
+ }
+
+ # Terminate a buffer.
+ action term {
+ if ( fsm->buflen < BUFLEN )
+ fsm->buffer[fsm->buflen++] = 0;
+ }
+
+ # Clear out the buffer
+ action clear { fsm->buflen = 0; }
+
+ action help { printf("help\n"); }
+ action version { printf("version\n"); }
+ action output { printf("output: \"%s\"\n", fsm->buffer); }
+ action spec { printf("spec: \"%s\"\n", fsm->buffer); }
+ action mach { printf("machine: \"%s\"\n", fsm->buffer); }
+
+ # Helpers that collect strings
+ string = [^\0]+ >clear $append %term;
+
+ # Different arguments.
+ help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help;
+ version = ( '-v' | '--version' ) 0 @version;
+ output = '-o' 0? string 0 @output;
+ spec = '-S' 0? string 0 @spec;
+ mach = '-M' 0? string 0 @mach;
+
+ main := (
+ help |
+ version |
+ output |
+ spec |
+ mach
+ )*;
+}%%
+
+%% write data;
+
+void params_init( struct params *fsm )
+{
+ fsm->buflen = 0;
+ %% write init;
+}
+
+void params_execute( struct params *fsm, const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+int params_finish( struct params *fsm )
+{
+ %% write eof;
+
+ if ( fsm->cs == params_error )
+ return -1;
+ if ( fsm->cs >= params_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 2048
+
+int main( int argc, char **argv )
+{
+ int a;
+ struct params params;
+
+ params_init( &params );
+ for ( a = 1; a < argc; a++ )
+ params_execute( &params, argv[a], strlen(argv[a])+1 );
+ if ( params_finish( &params ) != 1 )
+ fprintf( stderr, "params: error processing arguments\n" );
+
+ return 0;
+}
diff --git a/examples/pullscan/Makefile b/examples/pullscan/Makefile
new file mode 100644
index 00000000..1a048ea1
--- /dev/null
+++ b/examples/pullscan/Makefile
@@ -0,0 +1,23 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+CFLAGS = -Wall -g -O3
+
+all: pullscan
+
+ps: pullscan.ps
+
+pullscan: pullscan.o
+ g++ -g -o $@ $<
+
+pullscan.c: pullscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) pullscan.rl | $(RLCODEGEN) -G2 -o $@
+
+%.o: %.c
+ gcc $(CFLAGS) -c -o $@ $<
+
+pullscan.ps: pullscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) pullscan.rl | $(RLCODEGEN) -V | dot -Tps > pullscan.ps
+
+distclean clean:
+ rm -Rf *.o pullscan.c pullscan pullscan.ps
diff --git a/examples/pullscan/pullscan.rl b/examples/pullscan/pullscan.rl
new file mode 100644
index 00000000..79e3c499
--- /dev/null
+++ b/examples/pullscan/pullscan.rl
@@ -0,0 +1,166 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define BUFSIZE 4096
+
+typedef struct _Scanner {
+ /* Scanner state. */
+ int cs;
+ int act;
+ int have;
+ int curline;
+ char *tokstart;
+ char *tokend;
+ char *p;
+ char *pe;
+ FILE *file;
+ int done;
+
+ /* Token data */
+ char *data;
+ int len;
+ int value;
+
+ char buf[BUFSIZE];
+} Scanner;
+
+
+void scan_init( Scanner *s, FILE *file )
+{
+ memset (s, '\0', sizeof(Scanner));
+ s->curline = 1;
+ s->file = file;
+}
+
+#define TK_NO_TOKEN (-1)
+#define TK_ERR 128
+#define TK_EOF 129
+#define TK_Identifier 130
+#define TK_Number 131
+
+
+%%{
+ machine Scanner;
+ write data;
+}%%
+
+#define ret_tok( _tok ) token = _tok; s->data = s->tokstart
+
+int scan( Scanner *s )
+{
+ char *p = s->p;
+ char *pe = s->pe;
+ int token = TK_NO_TOKEN;
+ int space, readlen;
+
+ while ( 1 ) {
+ if ( p == pe ) {
+ printf("scanner: need more data\n");
+
+ if ( s->tokstart == 0 )
+ s->have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ printf("scanner: buffer broken mid token\n");
+ s->have = pe - s->tokstart;
+ memmove( s->buf, s->tokstart, s->have );
+ s->tokend -= (s->tokstart-s->buf);
+ s->tokstart = s->buf;
+ }
+
+ p = s->buf + s->have;
+ space = BUFSIZE - s->have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ printf("scanner: out of buffer space\n");
+ return TK_ERR;
+ }
+
+ if ( s->done ) {
+ printf("scanner: end of file\n");
+ p[0] = 0;
+ readlen = 1;
+ }
+ else {
+ readlen = fread( p, 1, space, s->file );
+ if ( readlen < space )
+ s->done = 1;
+ }
+
+ pe = p + readlen;
+ }
+
+ %%{
+ machine Scanner;
+ access s->;
+
+ main := |*
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* ) =>
+ { ret_tok( TK_Identifier ); fbreak; };
+
+ # Whitespace
+ [ \t\n];
+
+ # Number
+ digit+ =>
+ { ret_tok( TK_Number ); fbreak; };
+
+ # EOF
+ 0 =>
+ { ret_tok( TK_EOF ); fbreak; };
+
+ # Anything else
+ any =>
+ { ret_tok( *p ); fbreak; };
+
+ *|;
+
+ write exec;
+ }%%
+
+ if ( s->cs == Scanner_error )
+ return TK_ERR;
+
+ if ( token != TK_NO_TOKEN ) {
+ /* Save p and pe. fbreak does not advance p. */
+ s->p = p + 1;
+ s->pe = pe;
+ s->len = s->p - s->data;
+ return token;
+ }
+ }
+}
+
+
+int main (int argc, char** argv)
+{
+ Scanner ss;
+ int tok;
+
+ scan_init(&ss, stdin);
+
+ while ( 1 ) {
+ tok = scan (&ss);
+ if ( tok == TK_EOF ) {
+ printf ("parser: EOF\n");
+ break;
+ }
+ else if ( tok == TK_ERR ) {
+ printf ("parser: ERR\n");
+ break;
+ }
+ else {
+ printf ("parser: %d \"", tok);
+ fwrite ( ss.data, 1, ss.len, stdout );
+ printf ("\"\n" );
+ }
+ }
+
+ return 0;
+}
+
+
diff --git a/examples/rlscan/Makefile b/examples/rlscan/Makefile
new file mode 100644
index 00000000..2021d27c
--- /dev/null
+++ b/examples/rlscan/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: rlscan
+
+ps: rlscan.ps
+
+rlscan: rlscan.o
+ g++ -g -o rlscan rlscan.o
+
+rlscan.cpp: rlscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) rlscan.rl | $(RLCODEGEN) -G2 -o rlscan.cpp
+
+%.o: %.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+rlscan.ps: rlscan.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) rlscan.rl | $(RLCODEGEN) -V | dot -Tps > rlscan.ps
+
+distclean clean:
+ rm -Rf *.o rlscan.cpp rlscan rlscan.ps
diff --git a/examples/rlscan/rlscan.rl b/examples/rlscan/rlscan.rl
new file mode 100644
index 00000000..f912b8d8
--- /dev/null
+++ b/examples/rlscan/rlscan.rl
@@ -0,0 +1,298 @@
+/*
+ * Lexes Ragel input files.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+void escapeXML( char *data )
+{
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void escapeXML( char c )
+{
+ switch ( c ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << c; break;
+ }
+}
+
+void escapeXML( char *data, int len )
+{
+ for ( char *end = data + len; data != end; data++ ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ }
+}
+
+inline void write( char *data )
+{
+ cout << data;
+}
+
+inline void write( char c )
+{
+ cout << c;
+}
+
+inline void write( char *data, int len )
+{
+ cout.write( data, len );
+}
+
+
+%%{
+ machine RagelScan;
+
+ word = [a-zA-Z_][a-zA-Z_0-9]*;
+ integer = [0-9]+;
+ hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+ default = ^0;
+ EOF = 0;
+
+ # Handles comments in outside code and inline blocks.
+ c_comment :=
+ ( default* :>> '*/' )
+ ${ escapeXML( fc ); }
+ @{ fret; };
+
+ action emit {
+ escapeXML( tokstart, tokend-tokstart );
+ }
+
+ #
+ # Inline action code
+ #
+
+ ilscan := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+ '/*' {
+ write( "/*" );
+ fcall c_comment;
+ };
+ '//' [^\n]* '\n' => emit;
+
+ '{' {
+ write( '{' );
+ inline_depth += 1;
+ };
+
+ '}' {
+ write( '}' );
+ /* If dropping down to the last } then return
+ * to ragel code. */
+ if ( --inline_depth == 0 ) {
+ write( "</inline>\n" );
+ fgoto rlscan;
+ }
+ };
+
+ default => { escapeXML( *tokstart ); };
+ *|;
+
+ #
+ # Ragel Tokens
+ #
+
+ rlscan := |*
+ '}%%' {
+ if ( !single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ '\n' {
+ if ( single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ # Word
+ word {
+ write( "<word>" );
+ write( tokstart, tokend-tokstart );
+ write( "</word>\n" );
+ };
+
+ # Decimal integer.
+ integer {
+ write( "<int>" );
+ write( tokstart, tokend-tokstart );
+ write( "</int>\n" );
+ };
+
+ # Hexidecimal integer.
+ hex {
+ write( "<hex>" );
+ write( tokstart, tokend-tokstart );
+ write( "</hex>\n" );
+ };
+
+ # Consume comments.
+ '#' [^\n]* '\n';
+
+ # Single literal string.
+ "'" ( [^'\\] | /\\./ )* "'" {
+ write( "<single_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</single_lit>\n" );
+ };
+
+ # Double literal string.
+ '"' ( [^"\\] | /\\./ )* '"' {
+ write( "<double_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</double_lit>\n" );
+ };
+
+ # Or literal.
+ '[' ( [^\]\\] | /\\./ )* ']' {
+ write( "<or_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</or_lit>\n" );
+ };
+
+ # Regex Literal.
+ '/' ( [^/\\] | /\\./ ) * '/' {
+ write( "<re_lit>" );
+ escapeXML( tokstart, tokend-tokstart );
+ write( "</re_lit>\n" );
+ };
+
+ # Open an inline block
+ '{' {
+ inline_depth = 1;
+ write( "<inline>{" );
+ fgoto ilscan;
+ };
+
+ punct {
+ write( "<symbol>" );
+ escapeXML( fc );
+ write( "</symbol>\n" );
+ };
+
+ default;
+ *|;
+
+ #
+ # Outside code.
+ #
+
+ main := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+
+ '/*' {
+ escapeXML( tokstart, tokend-tokstart );
+ fcall c_comment;
+ };
+
+ '//' [^\n]* '\n' => emit;
+
+ '%%{' {
+ write( "<section>\n" );
+ single_line = false;
+ fgoto rlscan;
+ };
+
+ '%%' {
+ write( "<section>\n" );
+ single_line = true;
+ fgoto rlscan;
+ };
+
+ default {
+ escapeXML( *tokstart );
+ };
+
+ # EOF.
+ EOF;
+ *|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 2048
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ int cs, act;
+ char *tokstart, *tokend;
+ int stack[1], top;
+
+ static char inbuf[BUFSIZE];
+ bool single_line = false;
+ int inline_depth = 0;
+
+ %% write init;
+
+ bool done = false;
+ int have = 0;
+ while ( !done ) {
+ /* How much space is in the buffer? */
+ int space = BUFSIZE - have;
+ if ( space == 0 ) {
+ /* Buffer is full. */
+ cerr << "TOKEN TOO BIG" << endl;
+ exit(1);
+ }
+
+ /* Read in a block. */
+ char *p = inbuf + have;
+ cin.read( p, space );
+ int len = cin.gcount();
+
+ /* Check for EOF. */
+ if ( len == 0 ) {
+ p[0] = 0, len++;
+ done = true;
+ }
+
+ char *pe = p + len;
+ %% write exec;
+
+ if ( cs == RagelScan_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ if ( tokstart == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - tokstart;
+ memmove( inbuf, tokstart, have );
+ tokend = inbuf + (tokend-tokstart);
+ tokstart = inbuf;
+ }
+ }
+ return 0;
+}
diff --git a/examples/statechart/Makefile b/examples/statechart/Makefile
new file mode 100644
index 00000000..3dec7fd3
--- /dev/null
+++ b/examples/statechart/Makefile
@@ -0,0 +1,21 @@
+RAGEL = ../../ragel/ragel
+RLCODEGEN = ../../rlcodegen/rlcodegen
+
+all: statechart
+
+ps: statechart.ps
+
+statechart: statechart.o
+ g++ -g -o statechart statechart.o
+
+statechart.cpp: statechart.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) statechart.rl | $(RLCODEGEN) -G2 -o statechart.cpp
+
+statechart.o: statechart.cpp
+ g++ -Wall -g -c -O3 -o $@ $<
+
+statechart.ps: statechart.rl $(RAGEL) $(RLCODEGEN)
+ $(RAGEL) statechart.rl | $(RLCODEGEN) -V | dot -Tps > statechart.ps
+
+distclean clean:
+ rm -Rf *.o statechart.cpp statechart statechart.ps
diff --git a/examples/statechart/statechart.rl b/examples/statechart/statechart.rl
new file mode 100644
index 00000000..cb99a203
--- /dev/null
+++ b/examples/statechart/statechart.rl
@@ -0,0 +1,114 @@
+/*
+ * Demonstrate the use of labels, the epsilon operator, and the join operator
+ * for creating machines using the named state and transition list paradigm.
+ * This implementes the same machine as the atoi example.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+struct StateChart
+{
+ bool neg;
+ int val;
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine StateChart;
+
+ action begin {
+ neg = false;
+ val = 0;
+ }
+
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ action finish {
+ if ( neg )
+ val = -1 * val;
+ }
+
+ atoi = (
+ start: (
+ '-' @see_neg ->om_num |
+ '+' ->om_num |
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # One or more nums.
+ om_num: (
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # Zero ore more nums.
+ more_nums: (
+ [0-9] @add_digit ->more_nums |
+ '' -> final
+ )
+ ) >begin %finish;
+
+ main := ( atoi '\n' @{ cout << val << endl; } )*;
+}%%
+
+%% write data;
+
+int StateChart::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int StateChart::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == StateChart_error )
+ return -1;
+ if ( cs >= StateChart_first_final )
+ return 1;
+ return 0;
+}
+
+int StateChart::finish( )
+{
+ %% write eof;
+ if ( cs == StateChart_error )
+ return -1;
+ if ( cs >= StateChart_first_final )
+ return 1;
+ return 0;
+}
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+
+ StateChart atoi;
+ atoi.init();
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ atoi.execute( buf, strlen(buf) );
+ }
+ if ( atoi.finish() <= 0 )
+ cerr << "statechart: error: parsing input" << endl;
+ return 0;
+}
diff --git a/examples/uri/uri.rl b/examples/uri/uri.rl
new file mode 100644
index 00000000..185a76c6
--- /dev/null
+++ b/examples/uri/uri.rl
@@ -0,0 +1,31 @@
+%%{
+ machine uri;
+
+ action scheme {}
+ action loc {}
+ action item {}
+ action query {}
+ action last {}
+ action nothing {}
+
+ main :=
+ # Scheme machine. This is ambiguous with the item machine. We commit
+ # to the scheme machine on colon.
+ ( [^:/?#]+ ':' @(colon,1) @scheme )?
+
+ # Location machine. This is ambiguous with the item machine. We remain
+ # ambiguous until a second slash, at that point and all points after
+ # we place a higher priority on staying in the location machine over
+ # moving into the item machine.
+ ( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )?
+
+ # Item machine. Ambiguous with both scheme and location, which both
+ # get a higher priority on the characters causing ambiguity.
+ ( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )?
+
+ # Last two components, the characters that initiate these machines are
+ # not supported in any previous components, therefore there are no
+ # ambiguities introduced by these parts.
+ ( '?' [^#]* %query %/query)?
+ ( '#' any* %/last )?;
+}%%