summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2019-09-11 18:22:31 -0600
committerAdrian Thurston <thurston@colm.net>2019-09-11 18:22:31 -0600
commite4f23077edf61818128b355f2aab2b900702ea97 (patch)
tree05d3294062b259a3d72e277950e1364c50bbea07 /examples
parentbccaa853593339c2bac8ddede25f18e1afc91597 (diff)
downloadcolm-e4f23077edf61818128b355f2aab2b900702ea97.tar.gz
unifying some of the top-level components
including vim syntax, CREDITS, COPYING, examples, contrib.
Diffstat (limited to 'examples')
-rw-r--r--examples/.gitignore39
-rw-r--r--examples/CMakeLists.txt11
-rw-r--r--examples/Makefile.am72
-rw-r--r--examples/README40
-rw-r--r--examples/atoi.rl59
-rw-r--r--examples/awkemu.rl116
-rwxr-xr-xexamples/awkequiv.awk10
-rw-r--r--examples/clang.rl150
-rw-r--r--examples/concurrent.rl126
-rw-r--r--examples/cppscan.lex143
-rw-r--r--examples/cppscan.rec183
-rw-r--r--examples/cppscan.rl208
-rw-r--r--examples/format.rl191
-rw-r--r--examples/go/.gitignore5
-rw-r--r--examples/go/Makefile32
-rw-r--r--examples/go/README36
-rw-r--r--examples/go/atoi.rl89
-rw-r--r--examples/go/rpn.rl159
-rw-r--r--examples/go/url.rl414
-rw-r--r--examples/go/url_authority.rl165
-rw-r--r--examples/gotocallret.rl96
-rw-r--r--examples/mailbox.rl207
-rw-r--r--examples/params.rl102
-rw-r--r--examples/pullscan.rl170
-rw-r--r--examples/rlscan.rl300
-rw-r--r--examples/statechart.rl116
-rw-r--r--examples/uri.rl31
27 files changed, 3270 insertions, 0 deletions
diff --git a/examples/.gitignore b/examples/.gitignore
new file mode 100644
index 00000000..b309591b
--- /dev/null
+++ b/examples/.gitignore
@@ -0,0 +1,39 @@
+/Makefile.in
+/Makefile
+/concurrent
+/concurrent.exe
+/rlscan
+/rlscan.exe
+/clang
+/clang.exe
+/statechart
+/statechart.exe
+/gotocallret
+/gotocallret.exe
+/pullscan
+/pullscan.exe
+/cppscan
+/cppscan.exe
+/format
+/format.exe
+/awkemu
+/awkemu.exe
+/mailbox
+/mailbox.exe
+/atoi
+/atoi.exe
+/params
+/params.exe
+/statechart.cpp
+/gotocallret.cpp
+/clang.c
+/cppscan.cpp
+/mailbox.cpp
+/atoi.cpp
+/pullscan.c
+/concurrent.cpp
+/rlscan.cpp
+/params.c
+/format.c
+/awkemu.c
+/.deps
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 00000000..6ff75544
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+foreach(_example atoi awkemu clang concurrent cppscan format gotocallret
+ mailbox params rlscan statechart pullscan)
+ add_custom_command(
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp"
+ DEPENDS ${_example}.rl
+ COMMAND ragel
+ ARGS -G2 -o "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp"
+ "${CMAKE_CURRENT_LIST_DIR}/${_example}.rl"
+ WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}")
+ add_executable(${_example} "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp")
+endforeach()
diff --git a/examples/Makefile.am b/examples/Makefile.am
new file mode 100644
index 00000000..7cda0171
--- /dev/null
+++ b/examples/Makefile.am
@@ -0,0 +1,72 @@
+
+RAGEL = ../src/ragel
+FLEX = flex
+RE2C = re2c
+
+noinst_PROGRAMS = \
+ atoi concurrent cppscan format gotocallret mailbox params \
+ statechart
+
+EXTRA_DIST = \
+ gotocallret.rl pullscan.rl concurrent.rl rlscan.rl statechart.rl \
+ params.rl clang.rl cppscan.rl format.rl awkemu.rl mailbox.rl atoi.rl
+
+gotocallret_SOURCES = gotocallret.cpp
+pullscan_SOURCES = pullscan.c
+concurrent_SOURCES = concurrent.cpp
+rlscan_SOURCES = rlscan.cpp
+statechart_SOURCES = statechart.cpp
+params_SOURCES = params.c
+clang_SOURCES = clang.c
+cppscan_SOURCES = cppscan.cpp
+format_SOURCES = format.c
+awkemu_SOURCES = awkemu.c
+mailbox_SOURCES = mailbox.cpp
+atoi_SOURCES = atoi.cpp
+
+gotocallret.cpp: gotocallret.rl
+ $(RAGEL) -G2 -o gotocallret.cpp gotocallret.rl
+
+pullscan.c: pullscan.rl $(RAGEL)
+ $(RAGEL) -G2 -o $@ pullscan.rl
+
+concurrent.cpp: concurrent.rl $(RAGEL)
+ $(RAGEL) -G2 -o concurrent.cpp concurrent.rl
+
+rlscan.cpp: rlscan.rl
+ $(RAGEL) -G2 -o rlscan.cpp rlscan.rl
+
+statechart.cpp: statechart.rl
+ $(RAGEL) -G2 -o statechart.cpp statechart.rl
+
+params.c: params.rl
+ $(RAGEL) -G2 -o params.c params.rl
+
+clang.c: clang.rl
+ $(RAGEL) -G2 -o clang.c clang.rl
+
+cppscan.cpp: cppscan.rl
+ $(RAGEL) -G2 -o $@ cppscan.rl
+
+format.c: format.rl
+ $(RAGEL) -G2 -o format.c format.rl
+
+awkemu.c: awkemu.rl
+ $(RAGEL) -G2 -o awkemu.c awkemu.rl
+
+mailbox.cpp: mailbox.rl
+ $(RAGEL) -G2 -o mailbox.cpp mailbox.rl
+
+atoi.cpp: atoi.rl
+ $(RAGEL) -G2 -o atoi.cpp atoi.rl
+
+###
+
+lex-cppscan.cpp: cppscan.lex
+ $(FLEX) -f -o $@ $<
+
+re2c-cppscan.cpp: cppscan.rec
+ $(RE2C) -s $< > $@
+
+example.cpp: example.rec
+ $(RE2C) -s $< > $@
diff --git a/examples/README b/examples/README
new file mode 100644
index 00000000..12773cb3
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,40 @@
+
+ Ragel State Machine Compiler -- Examples
+ ========================================
+
+atoi -- Converts a string to an integer.
+
+awkemu -- Perfoms the basic parsing that the awk program perfoms on input.
+ The awk equivalent to awkemu is in awkemu/awkequiv.awk
+
+clang -- A scanner for a simple C like language. It breaks input up into
+ words, numbers, strings and symbols and strips out whitespace
+ and comments. It is a suitable template for writing a parser
+ that finds a sequence of tokens.
+
+concurrent -- Demonstrates the ability of ragel to produce parsers that
+ perform independent tasks concurrently.
+
+cppscan -- A C++ scanner that uses the longest match scanning method. This
+ example differs from other examples of scanning. Each run of the
+ state machine matches one token. This method results in a
+ smaller state machine since the final kleene star is omitted and
+ therefore every state does not need to get all the transitions
+ of the start state.
+
+format -- Partial printf implementation.
+
+gotocallret -- Demonstrate the use of fgoto, fcall and fret.
+
+mailbox -- Parses unix mailbox files. It breaks files into messages, and
+ messages into headers and body. It demonstrates Ragel's ability
+ to make parsers for structured file formats.
+
+params -- Parses command line arguements.
+
+rlscan -- Lexes Ragel input files.
+
+statechart -- Demonstrate the use of labels, the epsilon operator, and the
+ join operator for creating machines using the named state and
+ transition list paradigm. This implementes the same machine as
+ the atoi example.
diff --git a/examples/atoi.rl b/examples/atoi.rl
new file mode 100644
index 00000000..7164b68d
--- /dev/null
+++ b/examples/atoi.rl
@@ -0,0 +1,59 @@
+/*
+ * Convert a string to an integer.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+%%{
+ machine atoi;
+ write data;
+}%%
+
+long long atoi( char *str )
+{
+ char *p = str, *pe = str + strlen( str );
+ int cs;
+ long long val = 0;
+ bool neg = false;
+
+ %%{
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ main :=
+ ( '-'@see_neg | '+' )? ( digit @add_digit )+
+ '\n';
+
+ # Initialize and execute.
+ write init;
+ write exec;
+ }%%
+
+ if ( neg )
+ val = -1 * val;
+
+ if ( cs < atoi_first_final )
+ fprintf( stderr, "atoi: there was an error\n" );
+
+ return val;
+};
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ long long value = atoi( buf );
+ printf( "%lld\n", value );
+ }
+ return 0;
+}
diff --git a/examples/awkemu.rl b/examples/awkemu.rl
new file mode 100644
index 00000000..6615943d
--- /dev/null
+++ b/examples/awkemu.rl
@@ -0,0 +1,116 @@
+/*
+ * Perform the basic line parsing of input performed by awk.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+%%{
+ machine awkemu;
+
+ action start_word {
+ ws[nwords] = fpc;
+ }
+
+ action end_word {
+ we[nwords++] = fpc;
+ }
+
+ action start_line {
+ nwords = 0;
+ ls = fpc;
+ }
+
+ action end_line {
+ printf("endline(%i): ", nwords );
+ fwrite( ls, 1, p - ls, stdout );
+ printf("\n");
+
+ for ( i = 0; i < nwords; i++ ) {
+ printf(" word: ");
+ fwrite( ws[i], 1, we[i] - ws[i], stdout );
+ printf("\n");
+ }
+ }
+
+ # Words in a line.
+ word = ^[ \t\n]+;
+
+ # The whitespace separating words in a line.
+ whitespace = [ \t];
+
+ # The components in a line to break up. Either a word or a single char of
+ # whitespace. On the word capture characters.
+ blineElements = word >start_word %end_word | whitespace;
+
+ # Star the break line elements. Just be careful to decrement the leaving
+ # priority as we don't want multiple character identifiers to be treated as
+ # multiple single char identifiers.
+ line = ( blineElements** '\n' ) >start_line @end_line;
+
+ # Any number of lines.
+ main := line*;
+}%%
+
+%% write data noerror nofinal;
+
+#define MAXWORDS 256
+#define BUFSIZE 4096
+char buf[BUFSIZE];
+
+int main()
+{
+ int i, nwords = 0;
+ char *ls = 0;
+ char *ws[MAXWORDS];
+ char *we[MAXWORDS];
+
+ int cs;
+ int have = 0;
+
+ %% write init;
+
+ while ( 1 ) {
+ char *p, *pe, *data = buf + have;
+ int len, space = BUFSIZE - have;
+ /* fprintf( stderr, "space: %i\n", space ); */
+
+ if ( space == 0 ) {
+ fprintf(stderr, "buffer out of space\n");
+ exit(1);
+ }
+
+ len = fread( data, 1, space, stdin );
+ /* fprintf( stderr, "len: %i\n", len ); */
+ if ( len == 0 )
+ break;
+
+ /* Find the last newline by searching backwards. This is where
+ * we will stop processing on this iteration. */
+ p = buf;
+ pe = buf + have + len - 1;
+ while ( *pe != '\n' && pe >= buf )
+ pe--;
+ pe += 1;
+
+ /* fprintf( stderr, "running on: %i\n", pe - p ); */
+
+ %% write exec;
+
+ /* How much is still in the buffer. */
+ have = data + len - pe;
+ if ( have > 0 )
+ memmove( buf, pe, have );
+
+ /* fprintf(stderr, "have: %i\n", have ); */
+
+ if ( len < space )
+ break;
+ }
+
+ if ( have > 0 )
+ fprintf(stderr, "input not newline terminated\n");
+ return 0;
+}
diff --git a/examples/awkequiv.awk b/examples/awkequiv.awk
new file mode 100755
index 00000000..9877dd36
--- /dev/null
+++ b/examples/awkequiv.awk
@@ -0,0 +1,10 @@
+#!/usr/bin/awk -f
+#
+
+
+{
+ print "endline(" NF "): " $0
+ for ( i = 1; i <= NF; i++ ) {
+ print " word: " $i
+ }
+}
diff --git a/examples/clang.rl b/examples/clang.rl
new file mode 100644
index 00000000..60491e5e
--- /dev/null
+++ b/examples/clang.rl
@@ -0,0 +1,150 @@
+/*
+ * A mini C-like language scanner.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+%%{
+ machine clang;
+
+ newline = '\n' @{curline += 1;};
+ any_count_line = any | newline;
+
+ # Consume a C comment.
+ c_comment := any_count_line* :>> '*/' @{fgoto main;};
+
+ main := |*
+
+ # Alpha numberic characters or underscore.
+ alnum_u = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alpha_u = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ ( punct - [_'"] ) {
+ printf( "symbol(%i): %c\n", curline, ts[0] );
+ };
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ alpha_u alnum_u* {
+ printf( "ident(%i): ", curline );
+ fwrite( ts, 1, te-ts, stdout );
+ printf("\n");
+ };
+
+ # Single Quote.
+ sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
+ '\'' . sliteralChar* . '\'' {
+ printf( "single_lit(%i): ", curline );
+ fwrite( ts, 1, te-ts, stdout );
+ printf("\n");
+ };
+
+ # Double Quote.
+ dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
+ '"' . dliteralChar* . '"' {
+ printf( "double_lit(%i): ", curline );
+ fwrite( ts, 1, te-ts, stdout );
+ printf("\n");
+ };
+
+ # Whitespace is standard ws, newlines and control codes.
+ any_count_line - 0x21..0x7e;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ '//' [^\n]* newline;
+
+ '/*' { fgoto c_comment; };
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ digit+ {
+ printf( "int(%i): ", curline );
+ fwrite( ts, 1, te-ts, stdout );
+ printf("\n");
+ };
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ digit+ '.' digit+ {
+ printf( "float(%i): ", curline );
+ fwrite( ts, 1, te-ts, stdout );
+ printf("\n");
+ };
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ '0x' xdigit+ {
+ printf( "hex(%i): ", curline );
+ fwrite( ts, 1, te-ts, stdout );
+ printf("\n");
+ };
+
+ *|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 128
+
+void scanner()
+{
+ static char buf[BUFSIZE];
+ int cs, act, have = 0, curline = 1;
+ char *ts, *te = 0;
+ int done = 0;
+
+ %% write init;
+
+ while ( !done ) {
+ char *p = buf + have, *pe, *eof = 0;
+ int len, space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We've used up the entire buffer storing an already-parsed token
+ * prefix that must be preserved. */
+ fprintf(stderr, "OUT OF BUFFER SPACE\n" );
+ exit(1);
+ }
+
+ len = fread( p, 1, space, stdin );
+ pe = p + len;
+
+ /* Check if this is the end of file. */
+ if ( len < space ) {
+ eof = pe;
+ done = 1;
+ }
+
+ %% write exec;
+
+ if ( cs == clang_error ) {
+ fprintf(stderr, "PARSE ERROR\n" );
+ break;
+ }
+
+ if ( ts == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - ts;
+ memmove( buf, ts, have );
+ te = buf + (te-ts);
+ ts = buf;
+ }
+ }
+}
+
+int main()
+{
+ scanner();
+ return 0;
+}
+
diff --git a/examples/concurrent.rl b/examples/concurrent.rl
new file mode 100644
index 00000000..224f9601
--- /dev/null
+++ b/examples/concurrent.rl
@@ -0,0 +1,126 @@
+/*
+ * Show off concurrent abilities.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+struct Concurrent
+{
+ int cur_char;
+ int start_word;
+ int start_comment;
+ int start_literal;
+
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len, bool isEof );
+ int finish( );
+};
+
+%%{
+ machine Concurrent;
+
+ action next_char {
+ cur_char += 1;
+ }
+
+ action start_word {
+ start_word = cur_char;
+ }
+ action end_word {
+ cout << "word: " << start_word <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_comment {
+ start_comment = cur_char;
+ }
+ action end_comment {
+ cout << "comment: " << start_comment <<
+ " " << cur_char-1 << endl;
+ }
+
+ action start_literal {
+ start_literal = cur_char;
+ }
+ action end_literal {
+ cout << "literal: " << start_literal <<
+ " " << cur_char-1 << endl;
+ }
+
+ # Count characters.
+ chars = ( any @next_char )*;
+
+ # Words are non-whitespace.
+ word = ( any-space )+ >start_word %end_word;
+ words = ( ( word | space ) $1 %0 )*;
+
+ # Finds C style comments.
+ comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment;
+ comments = ( comment | any )**;
+
+ # Finds single quoted strings.
+ literalChar = ( any - ['\\] ) | ( '\\' . any );
+ literal = ('\'' literalChar* '\'' ) >start_literal %end_literal;
+ literals = ( ( literal | (any-'\'') ) $1 %0 )*;
+
+ main := chars | words | comments | literals;
+}%%
+
+%% write data;
+
+int Concurrent::init( )
+{
+ %% write init;
+ cur_char = 0;
+ return 1;
+}
+
+int Concurrent::execute( const char *data, int len, bool isEof )
+{
+ const char *p = data;
+ const char *pe = data + len;
+ const char *eof = isEof ? pe : 0;
+
+ %% write exec;
+
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+int Concurrent::finish( )
+{
+ if ( cs == Concurrent_error )
+ return -1;
+ if ( cs >= Concurrent_first_final )
+ return 1;
+ return 0;
+}
+
+Concurrent concurrent;
+char buf[BUFSIZE];
+
+int main()
+{
+ concurrent.init();
+ while ( 1 ) {
+ int len = fread( buf, 1, BUFSIZE, stdin );
+ concurrent.execute( buf, len, len != BUFSIZE );
+ if ( len != BUFSIZE )
+ break;
+ }
+
+ if ( concurrent.finish() <= 0 )
+ cerr << "concurrent: error parsing input" << endl;
+ return 0;
+}
diff --git a/examples/cppscan.lex b/examples/cppscan.lex
new file mode 100644
index 00000000..fb662538
--- /dev/null
+++ b/examples/cppscan.lex
@@ -0,0 +1,143 @@
+/*
+ * flex equivalent to cppscan.rl
+ */
+
+%{
+
+#include <stdio.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ fputc( data[i], stdout );
+ fputc( '\n', stdout );
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+
+%}
+
+%x COMMENT
+
+FRACT_CONST [0-9]*\.[0-9]+|[0-9]+\.
+EXPONENT [eE][+\-]?[0-9]+
+FLOAT_SUFFIX [flFL]
+
+%%
+
+ /* Single and double literals. */
+L?\'([^\'\\\n]|\\.)*\' {
+ token( TK_Slit, yytext, yyleng );
+}
+
+L?\"([^\"\\\n]|\\.)*\" {
+ token( TK_Dlit, yytext, yyleng );
+}
+
+[a-zA-Z_][a-zA-Z0-9_]* {
+ token( TK_Id, yytext, yyleng );
+}
+
+{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? {
+ token( TK_Float, yytext, yyleng );
+}
+
+(0|[1-9][0-9]*)[ulUL]{0,3} {
+ token( TK_IntegerDecimal, yytext, yyleng );
+}
+
+0[0-9]+[ulUL]{0,2} {
+ token( TK_IntegerOctal, yytext, yyleng );
+}
+
+0x[0-9a-fA-F]+[ulUL]{0,2} {
+ token( TK_IntegerHex, yytext, yyleng );
+}
+
+:: token( TK_NameSep, yytext, yyleng );
+== token( TK_EqualsEquals, yytext, yyleng );
+!= token( TK_NotEquals, yytext, yyleng );
+&& token( TK_AndAnd, yytext, yyleng );
+\|\| token( TK_OrOr, yytext, yyleng );
+\*= token( TK_MultAssign, yytext, yyleng );
+\/= token( TK_DivAssign, yytext, yyleng );
+%= token( TK_PercentAssign, yytext, yyleng );
+\+= token( TK_PlusAssign, yytext, yyleng );
+-= token( TK_MinusAssign, yytext, yyleng );
+&= token( TK_AmpAssign, yytext, yyleng );
+^= token( TK_CaretAssign, yytext, yyleng );
+\|= token( TK_BarAssign, yytext, yyleng );
+\+\+ token( TK_PlusPlus, yytext, yyleng );
+-- token( TK_MinusMinus, yytext, yyleng );
+-> token( TK_Arrow, yytext, yyleng );
+->\* token( TK_ArrowStar, yytext, yyleng );
+\.\* token( TK_DotStar, yytext, yyleng );
+\.\.\. token( TK_DotDotDot, yytext, yyleng );
+
+\/\* BEGIN(COMMENT);
+<COMMENT>\*\/ BEGIN(INITIAL);
+<COMMENT>(.|\n) { }
+
+\/\/.*\n {}
+[^!-~]+ {}
+
+[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng );
+
+%%
+
+int yywrap()
+{
+ /* Once the input is done, no more. */
+ return 1;
+}
+
+int main()
+{
+ yylex();
+}
diff --git a/examples/cppscan.rec b/examples/cppscan.rec
new file mode 100644
index 00000000..43f297d8
--- /dev/null
+++ b/examples/cppscan.rec
@@ -0,0 +1,183 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+int line = 1, col = 1;
+
+void token( int tok, char *data, int len )
+{
+ printf( "<%i> ", tok );
+ for ( int i = 0; i < len; i++ )
+ fputc( data[i], stdout );
+ fputc( '\n', stdout );
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+#define BUFSIZE 8192
+char buf[BUFSIZE];
+
+void fill( int n )
+{
+ printf("fill(%i)\n", n);
+ exit(1);
+}
+
+int main()
+{
+ char *start, *p = buf, *lim = buf, *marker;
+ int len, have, want, shift;
+ int done = 0;
+
+#define YYCTYPE char
+
+#define YYCURSOR p
+#define YYLIMIT lim
+#define YYMARKER marker
+
+#define YYFILL(n) { \
+ if ( ! done ) { \
+ have = lim-start; \
+ if ( start > buf ) { \
+ shift = start-buf; \
+ memmove( buf, start, have ); \
+ start -= shift; \
+ p -= shift; \
+ lim -= shift; \
+ marker -= shift; \
+ } \
+ want = BUFSIZE - have - 1; \
+ len = fread( lim, 1, want, stdin ); \
+ lim += len; \
+ if ( len < want ) { \
+ *lim++ = 0; \
+ done = 1; \
+ } \
+ } \
+ }
+
+again:
+ start = p;
+
+/*!re2c
+
+ANY = [\000-\377];
+FRACTCONST = ( [0-9]* "." [0-9]+ ) | [0-9]+ ".";
+EXPONENT = [eE] [+\-]? [0-9]+;
+FLOATSUFFIX = [flFL];
+
+ "L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" {
+ token( TK_Slit, start, p-start );
+ goto again;
+ }
+
+ "L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" {
+ token( TK_Dlit, start, p-start );
+ goto again;
+ }
+
+ [a-zA-Z_][a-zA-Z0-9_]* {
+ token( TK_Id, start, p-start );
+ goto again;
+ }
+
+ ( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) {
+ token( TK_Float, start, p-start );
+ goto again;
+ }
+
+
+ ( "0" | [1-9][0-9]* ) [ulUL]* {
+ token( TK_IntegerDecimal, start, p-start );
+ goto again;
+ }
+
+ "0" [0-9]+ [ulUL]* {
+ token( TK_IntegerOctal, start, p-start );
+ goto again;
+ }
+
+ "0x" [0-9a-fA-F]+[ulUL]* {
+ token( TK_IntegerHex, start, p-start );
+ goto again;
+ }
+
+ "::" { token( TK_NameSep, start, p-start ); goto again; }
+ "==" { token( TK_EqualsEquals, start, p-start ); goto again; }
+ "!=" { token( TK_NotEquals, start, p-start ); goto again; }
+ "&&" { token( TK_AndAnd, start, p-start ); goto again; }
+ "||" { token( TK_OrOr, start, p-start ); goto again; }
+ "*=" { token( TK_MultAssign, start, p-start ); goto again; }
+ "/=" { token( TK_DivAssign, start, p-start ); goto again; }
+ "%=" { token( TK_PercentAssign, start, p-start ); goto again; }
+ "+=" { token( TK_PlusAssign, start, p-start ); goto again; }
+ "-=" { token( TK_MinusAssign, start, p-start ); goto again; }
+ "&=" { token( TK_AmpAssign, start, p-start ); goto again; }
+ "^=" { token( TK_CaretAssign, start, p-start ); goto again; }
+ "|=" { token( TK_BarAssign, start, p-start ); goto again; }
+ "++" { token( TK_PlusPlus, start, p-start ); goto again; }
+ "--" { token( TK_MinusMinus, start, p-start ); goto again; }
+ "->" { token( TK_Arrow, start, p-start ); goto again; }
+ "->*" { token( TK_ArrowStar, start, p-start ); goto again; }
+ ".*" { token( TK_DotStar, start, p-start ); goto again; }
+ "..." { token( TK_DotDotDot, start, p-start ); goto again; }
+
+ "/*" { goto comment; }
+ "//" (ANY\"\n")* "\n" { goto again; }
+ [\001-\040\177]+ { goto again; }
+
+ [\041-\057\072-\100\133-\140\173-\176] {
+ token( *start, start, p-start );
+ goto again;
+ }
+ "\000" { return 0; }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto again; }
+ ANY { goto comment; }
+*/
+}
diff --git a/examples/cppscan.rl b/examples/cppscan.rl
new file mode 100644
index 00000000..1ead5aa6
--- /dev/null
+++ b/examples/cppscan.rl
@@ -0,0 +1,208 @@
+/*
+ * A C++ scanner. Uses the longest match construction.
+ * << <= <<= >> >= >>= are left out since angle brackets are used in templates.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <iostream>
+
+#define TK_Dlit 256
+#define TK_Slit 257
+#define TK_Float 258
+#define TK_Id 259
+#define TK_NameSep 260
+#define TK_Arrow 261
+#define TK_PlusPlus 262
+#define TK_MinusMinus 263
+#define TK_ArrowStar 264
+#define TK_DotStar 265
+#define TK_ShiftLeft 266
+#define TK_ShiftRight 267
+#define TK_IntegerDecimal 268
+#define TK_IntegerOctal 269
+#define TK_IntegerHex 270
+#define TK_EqualsEquals 271
+#define TK_NotEquals 272
+#define TK_AndAnd 273
+#define TK_OrOr 274
+#define TK_MultAssign 275
+#define TK_DivAssign 276
+#define TK_PercentAssign 277
+#define TK_PlusAssign 278
+#define TK_MinusAssign 279
+#define TK_AmpAssign 280
+#define TK_CaretAssign 281
+#define TK_BarAssign 282
+#define TK_DotDotDot 283
+#define TK_Whitespace 284
+#define TK_Comment 285
+
+#define BUFSIZE 16384
+
+/* EOF char used to flush out that last token. This should be a whitespace
+ * token. */
+
+#define LAST_CHAR 0
+
+using std::cerr;
+using std::cout;
+using std::cin;
+using std::endl;
+
+static char buf[BUFSIZE];
+static int line = 1, col = 1;
+static char *ts, *te;
+static int act, have = 0;
+static int cs;
+
+%%{
+ machine Scanner;
+ write data nofinal;
+
+ # Floating literals.
+ fract_const = digit* '.' digit+ | digit+ '.';
+ exponent = [eE] [+\-]? digit+;
+ float_suffix = [flFL];
+
+ c_comment :=
+ any* :>> '*/'
+ @{ fgoto main; };
+
+ main := |*
+
+ # Single and double literals.
+ ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" )
+ {token( TK_Slit );};
+ ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' )
+ {token( TK_Dlit );};
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* )
+ {token( TK_Id );};
+
+ # Floating literals.
+ ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? )
+ {token( TK_Float );};
+
+ # Integer decimal. Leading part buffered by float.
+ ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} )
+ {token( TK_IntegerDecimal );};
+
+ # Integer octal. Leading part buffered by float.
+ ( '0' [0-9]+ [ulUL]{0,2} )
+ {token( TK_IntegerOctal );};
+
+ # Integer hex. Leading 0 buffered by float.
+ ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) )
+ {token( TK_IntegerHex );};
+
+ # Only buffer the second item, first buffered by symbol. */
+ '::' {token( TK_NameSep );};
+ '==' {token( TK_EqualsEquals );};
+ '!=' {token( TK_NotEquals );};
+ '&&' {token( TK_AndAnd );};
+ '||' {token( TK_OrOr );};
+ '*=' {token( TK_MultAssign );};
+ '/=' {token( TK_DivAssign );};
+ '%=' {token( TK_PercentAssign );};
+ '+=' {token( TK_PlusAssign );};
+ '-=' {token( TK_MinusAssign );};
+ '&=' {token( TK_AmpAssign );};
+ '^=' {token( TK_CaretAssign );};
+ '|=' {token( TK_BarAssign );};
+ '++' {token( TK_PlusPlus );};
+ '--' {token( TK_MinusMinus );};
+ '->' {token( TK_Arrow );};
+ '->*' {token( TK_ArrowStar );};
+ '.*' {token( TK_DotStar );};
+
+ # Three char compounds, first item already buffered. */
+ '...' {token( TK_DotDotDot );};
+
+ # Single char symbols.
+ ( punct - [_"'] ) {token( ts[0] );};
+
+ # Comments and whitespace.
+ '/*' { fgoto c_comment; };
+ '//' [^\n]* '\n';
+ ( any - 33..126 )+;
+
+ *|;
+}%%
+
+void token( int tok )
+{
+ char *data = ts;
+ int len = te - ts;
+
+ cout << '<' << tok << "> ";
+ cout.write( data, len );
+ cout << '\n';
+
+ /* Count newlines and columns. This code is here mainly for having some
+ * code in the token routine when commenting out the above output during
+ * performance testing. */
+ for ( int i = 0; i < len; i ++ ) {
+ if ( data[i] == '\n' ) {
+ line += 1;
+ col = 1;
+ }
+ else {
+ col += 1;
+ }
+ }
+}
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ %% write init;
+
+ /* Do the first read. */
+ bool done = false;
+ while ( !done ) {
+ char *p = buf + have;
+ int space = BUFSIZE - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ cerr << "OUT OF BUFFER SPACE" << endl;
+ exit(1);
+ }
+
+ cin.read( p, space );
+ int len = cin.gcount();
+ char *pe = p + len;
+ char *eof = 0;
+
+ /* If we see eof then append the EOF char. */
+ if ( cin.eof() ) {
+ eof = pe;
+ done = true;
+ }
+
+ %% write exec;
+
+ /* Check if we failed. */
+ if ( cs == Scanner_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ /* Now set up the prefix. */
+ if ( ts == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - ts;
+ memmove( buf, ts, have );
+ te -= (ts-buf);
+ ts = buf;
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/format.rl b/examples/format.rl
new file mode 100644
index 00000000..f8a37beb
--- /dev/null
+++ b/examples/format.rl
@@ -0,0 +1,191 @@
+/*
+ * Partial printf implementation.
+ */
+
+#define BUFLEN 1024
+#include <stdio.h>
+
+typedef void (*WriteFunc)( char *data, int len );
+
+struct format
+{
+ char buf[BUFLEN+1];
+ int buflen;
+ WriteFunc write;
+
+ int flags;
+ int width;
+ int prec;
+ int cs;
+};
+
+void do_conv( struct format *fsm, char c )
+{
+ printf( "flags: %x\n", fsm->flags );
+ printf( "width: %i\n", fsm->width );
+ printf( "prec: %i\n", fsm->prec );
+ printf( "conv: %c\n", c );
+ printf( "\n" );
+}
+
+#define FL_HASH 0x01
+#define FL_ZERO 0x02
+#define FL_DASH 0x04
+#define FL_SPACE 0x08
+#define FL_PLUS 0x10
+
+#define FL_HAS_WIDTH 0x0100
+#define FL_WIDTH_ARG 0x0200
+#define FL_HAS_PREC 0x0400
+#define FL_PREC_ARG 0x0800
+
+#define FL_LEN_H 0x010000
+#define FL_LEN_HH 0x020000
+#define FL_LEN_L 0x040000
+#define FL_LEN_LL 0x080000
+
+%%{
+ machine format;
+ access fsm->;
+
+ action clear {
+ fsm->flags = 0;
+ fsm->width = 0;
+ fsm->prec = 0;
+ }
+
+ # A non-zero number.
+ nznum = [1-9] [0-9]*;
+
+ # Width
+ action width_num { fsm->width = 10 * fsm->width + (fc-'0'); }
+ action width_arg { fsm->flags |= FL_WIDTH_ARG; }
+ action width { fsm->flags |= FL_HAS_WIDTH; }
+ width = ( ( nznum $width_num | '*' @width_arg ) %width )?;
+
+ # Precision
+ action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); }
+ action prec_arg { fsm->flags |= FL_PREC_ARG; }
+ action prec { fsm->flags |= FL_HAS_PREC; }
+ precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?;
+
+ # Flags
+ action flags_hash { fsm->flags |= FL_HASH; }
+ action flags_zero { fsm->flags |= FL_ZERO; }
+ action flags_dash { fsm->flags |= FL_DASH; }
+ action flags_space { fsm->flags |= FL_SPACE; }
+ action flags_plus { fsm->flags |= FL_PLUS; }
+
+ flags = (
+ '#' @flags_hash |
+ '0' @flags_zero |
+ '-' @flags_dash |
+ ' ' @flags_space |
+ '+' @flags_plus )*;
+
+ action length_h { fsm->flags |= FL_LEN_H; }
+ action length_l { fsm->flags |= FL_LEN_L; }
+ action length_hh { fsm->flags |= FL_LEN_HH; }
+ action length_ll { fsm->flags |= FL_LEN_LL; }
+
+ # Must use leaving transitions on 'h' and 'l' because they are
+ # prefixes for 'hh' and 'll'.
+ length = (
+ 'h' %length_h |
+ 'l' %length_l |
+ 'hh' @length_hh |
+ 'll' @length_ll )?;
+
+ action conversion {
+ do_conv( fsm, fc );
+ }
+
+ conversion = [diouxXcsp] @conversion;
+
+ fmt_spec =
+ '%' @clear
+ flags
+ width
+ precision
+ length
+ conversion;
+
+ action emit {
+ if ( fsm->buflen == BUFLEN ) {
+ fsm->write( fsm->buf, fsm->buflen );
+ fsm->buflen = 0;
+ }
+ fsm->buf[fsm->buflen++] = fc;
+ }
+
+ action finish_ok {
+ if ( fsm->buflen > 0 )
+ fsm->write( fsm->buf, fsm->buflen );
+ }
+ action finish_err {
+ printf("EOF IN FORMAT\n");
+ }
+ action err_char {
+ printf("ERROR ON CHAR: 0x%x\n", fc );
+ }
+
+ main := (
+ [^%] @emit |
+ '%%' @emit |
+ fmt_spec
+ )* @/finish_err %/finish_ok $!err_char;
+}%%
+
+%% write data;
+
+void format_init( struct format *fsm )
+{
+ fsm->buflen = 0;
+ %% write init;
+}
+
+void format_execute( struct format *fsm, const char *data, int len, int isEof )
+{
+ const char *p = data;
+ const char *pe = data + len;
+ const char *eof = isEof ? pe : 0;
+
+ %% write exec;
+}
+
+int format_finish( struct format *fsm )
+{
+ if ( fsm->cs == format_error )
+ return -1;
+ if ( fsm->cs >= format_first_final )
+ return 1;
+ return 0;
+}
+
+
+#define INPUT_BUFSIZE 2048
+
+struct format fsm;
+char buf[INPUT_BUFSIZE];
+
+void write(char *data, int len )
+{
+ fwrite( data, 1, len, stdout );
+}
+
+int main()
+{
+ fsm.write = write;
+ format_init( &fsm );
+ while ( 1 ) {
+ int len = fread( buf, 1, INPUT_BUFSIZE, stdin );
+ int eof = len != INPUT_BUFSIZE;
+ format_execute( &fsm, buf, len, eof );
+ if ( eof )
+ break;
+ }
+ if ( format_finish( &fsm ) <= 0 )
+ printf("FAIL\n");
+ return 0;
+}
+
diff --git a/examples/go/.gitignore b/examples/go/.gitignore
new file mode 100644
index 00000000..f8b421d6
--- /dev/null
+++ b/examples/go/.gitignore
@@ -0,0 +1,5 @@
+/*.dot
+/*.go
+/atoi
+/rpn
+/url
diff --git a/examples/go/Makefile b/examples/go/Makefile
new file mode 100644
index 00000000..536afcc7
--- /dev/null
+++ b/examples/go/Makefile
@@ -0,0 +1,32 @@
+ragel = ragel
+
+check: atoi rpn url
+ ./atoi
+ ./rpn
+ ./url
+ @echo PASS
+
+graph: atoi.dot rpn.dot url.dot url_authority.dot
+ xdot atoi.dot
+ xdot rpn.dot
+ xdot url.dot
+ xdot url_authority.dot
+
+atoi: atoi.go
+atoi.go: atoi.rl
+atoi.dot: atoi.rl
+
+rpn: rpn.go
+rpn.go: rpn.rl
+rpn.dot: rpn.rl
+
+url: url.go url_authority.go
+url.go: url.rl
+url.dot: url.rl
+url_authority.go: url_authority.rl
+url_authority.dot: url_authority.rl
+
+clean: ; rm -f *.go *.dot atoi rpn url
+%: %.go ; go build -o $@ $^
+%.go: %.rl ; $(ragel) -Z -T0 -o $@ $<
+%.dot: %.rl ; $(ragel) -V -Z -p -o $@ $<
diff --git a/examples/go/README b/examples/go/README
new file mode 100644
index 00000000..bdb924b8
--- /dev/null
+++ b/examples/go/README
@@ -0,0 +1,36 @@
+.. -*-rst-*-
+
+Ragel Examples for Go
+=====================
+
+These examples serve the following purposes:
+
+- Help you learn Ragel
+- Test the correctness of the code I wrote for Ragel
+- Benchmark Ragel's performance on your machine
+- And hopefully give you some code you can steal ;]
+
+To get started you should first ``make install`` ragel. Then navigate
+to this directory and run::
+
+ make
+
+To automatically compile/test/benchmark these examples.
+
+The following examples are provided:
+
+- atoi.rl: Convert string to integer (very simple)
+- rpn.rl: Reverse polish notation calculator (simple)
+- url.rl: Very fast and robust HTTP/SIP URL parser (very complicated)
+
+To see graphviz diagrams of the state machines generated by Ragel in
+these examples, run the following commands::
+
+ sudo apt-get install xdot
+ make graph
+
+Those diagrams (along with the pdf manual) are super important for
+troubleshooting and simplifying your Ragel code.
+
+I truly hope these examples help you in your personal and professional
+endeavors. If you have any questions my email is: jtunney@gmail.com
diff --git a/examples/go/atoi.rl b/examples/go/atoi.rl
new file mode 100644
index 00000000..97c5163e
--- /dev/null
+++ b/examples/go/atoi.rl
@@ -0,0 +1,89 @@
+// -*-go-*-
+//
+// Convert a string to an integer.
+//
+// To compile:
+//
+// ragel -Z -T0 -o atoi.go atoi.rl
+// go build -o atoi atoi.go
+// ./atoi
+//
+// To show a diagram of your state machine:
+//
+// ragel -V -Z -p -o atoi.dot atoi.rl
+// xdot atoi.dot
+//
+
+package main
+
+import (
+ "os"
+ "fmt"
+)
+
+%%{
+ machine atoi;
+ write data;
+}%%
+
+func atoi(data string) (val int) {
+ cs, p, pe := 0, 0, len(data)
+ neg := false
+
+ %%{
+ action see_neg { neg = true }
+ action add_digit { val = val * 10 + (int(fc) - '0') }
+
+ main :=
+ ( '-'@see_neg | '+' )? ( digit @add_digit )+
+ '\n'?
+ ;
+
+ write init;
+ write exec;
+ }%%
+
+ if neg {
+ val = -1 * val;
+ }
+
+ if cs < atoi_first_final {
+ fmt.Println("atoi: there was an error:", cs, "<", atoi_first_final)
+ fmt.Println(data)
+ for i := 0; i < p; i++ {
+ fmt.Print(" ")
+ }
+ fmt.Println("^")
+ }
+
+ return val
+}
+
+//////////////////////////////////////////////////////////////////////
+
+type atoiTest struct {
+ s string
+ v int
+}
+
+var atoiTests = []atoiTest{
+ atoiTest{"7", 7},
+ atoiTest{"666", 666},
+ atoiTest{"-666", -666},
+ atoiTest{"+666", 666},
+ atoiTest{"1234567890", 1234567890},
+ atoiTest{"+1234567890\n", 1234567890},
+ // atoiTest{"+ 1234567890", 1234567890}, // i will fail
+}
+
+func main() {
+ res := 0
+ for _, test := range atoiTests {
+ res := atoi(test.s)
+ if res != test.v {
+ fmt.Fprintf(os.Stderr, "FAIL atoi(%#v) != %#v\n", test.s, test.v)
+ res = 1
+ }
+ }
+ os.Exit(res)
+}
diff --git a/examples/go/rpn.rl b/examples/go/rpn.rl
new file mode 100644
index 00000000..2ad0a2db
--- /dev/null
+++ b/examples/go/rpn.rl
@@ -0,0 +1,159 @@
+// -*-go-*-
+//
+// Reverse Polish Notation Calculator
+// Copyright (c) 2010 J.A. Roberts Tunney
+// MIT License
+//
+// To compile:
+//
+// ragel -Z -T0 -o rpn.go rpn.rl
+// go build -o rpn rpn.go
+// ./rpn
+//
+// To show a diagram of your state machine:
+//
+// ragel -V -Z -p -o rpn.dot rpn.rl
+// xdot -Tpng -o rpn.png rpn.dot
+//
+
+package main
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "strconv"
+)
+
+type stack struct {
+ items []int
+ count int
+}
+
+func (s *stack) pop() int {
+ s.count--
+ v := s.items[s.count]
+ return v
+}
+
+func (s *stack) push(v int) {
+ s.items[s.count] = v
+ s.count++
+}
+
+func abs(v int) int {
+ if v < 0 {
+ v = -v
+ }
+ return v
+}
+
+%% machine rpn;
+%% write data;
+
+func rpn(data string) (res int, err error) {
+ // p, pe, eof := 0, len(data), len(data)
+ cs, p, pe := 0, 0, len(data)
+ mark := 0
+ st := &stack{items: make([]int, 128), count: 0}
+
+ %%{
+ action mark { mark = p }
+ action push { x, _ := strconv.Atoi(data[mark:p]); st.push(x) }
+ action add { y, x := st.pop(), st.pop(); st.push(x + y) }
+ action sub { y, x := st.pop(), st.pop(); st.push(x - y) }
+ action mul { y, x := st.pop(), st.pop(); st.push(x * y) }
+ action div { y, x := st.pop(), st.pop(); st.push(x / y) }
+ action abs { st.push(abs(st.pop())) }
+ action abba { st.push(666) }
+
+ stuff = digit+ >mark %push
+ | '+' @add
+ | '-' @sub
+ | '*' @mul
+ | '/' @div
+ | 'abs' %abs
+ | 'add' %add
+ | 'abba' %abba
+ ;
+
+ main := ( space | stuff space )* ;
+
+ write init;
+ write exec;
+ }%%
+
+ if cs < rpn_first_final {
+ if p == pe {
+ return 0, errors.New("unexpected eof")
+ } else {
+ return 0, errors.New(fmt.Sprintf("error at position %d", p))
+ }
+ }
+
+ if st.count == 0 {
+ return 0, errors.New("rpn stack empty on result")
+ }
+
+ return st.pop(), nil
+}
+
+//////////////////////////////////////////////////////////////////////
+
+type rpnTest struct {
+ s string
+ v int
+}
+
+var rpnTests = []rpnTest{
+ rpnTest{"666\n", 666},
+ rpnTest{"666 111\n", 111},
+ rpnTest{"4 3 add\n", 7},
+ rpnTest{"4 3 +\n", 7},
+ rpnTest{"4 3 -\n", 1},
+ rpnTest{"4 3 *\n", 12},
+ rpnTest{"6 2 /\n", 3},
+ rpnTest{"0 3 -\n", -3},
+ rpnTest{"0 3 - abs\n", 3},
+ rpnTest{" 2 2 + 3 - \n", 1},
+ rpnTest{"10 7 3 2 * - +\n", 11},
+ rpnTest{"abba abba add\n", 1332},
+}
+
+type rpnFailTest struct {
+ s string
+ e string
+}
+
+var rpnFailTests = []rpnFailTest{
+ rpnFailTest{"\n", "rpn stack empty on result"},
+}
+
+func main() {
+ rc := 0
+
+ for _, test := range rpnTests {
+ res, err := rpn(test.s)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) %s\n", test.s, err)
+ rc = 1
+ } else if res != test.v {
+ fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) -> %#v != %#v\n",
+ test.s, res, test.v)
+ rc = 1
+ }
+ }
+
+ for _, test := range rpnFailTests {
+ res, err := rpn(test.s)
+ if err == nil {
+ fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) -> %#v should fail: %#v\n",
+ test.s, res, test.e)
+ } else if err.Error() != test.e {
+ fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) %#v should be %#v\n",
+ test.s, err.Error(), test.e)
+ }
+ }
+
+ os.Exit(rc)
+}
diff --git a/examples/go/url.rl b/examples/go/url.rl
new file mode 100644
index 00000000..e94d59c6
--- /dev/null
+++ b/examples/go/url.rl
@@ -0,0 +1,414 @@
+// -*-go-*-
+//
+// URL Parser
+// Copyright (c) 2010 J.A. Roberts Tunney
+// MIT License
+//
+// To compile:
+//
+// ragel -Z -T0 -o url.go url.rl
+// ragel -Z -T0 -o url_authority.go url_authority.rl
+// go build -o url url.go url_authority.go
+// ./url
+//
+// To show a diagram of your state machine:
+//
+// ragel -V -Z -p -o url.dot url.rl
+// xdot url.dot
+//
+// ragel -V -Z -p -o url_authority.dot url_authority.rl
+// xdot url_authority.dot
+//
+// Reference:
+//
+// - http://tools.ietf.org/html/rfc3986
+//
+
+package main
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "time"
+)
+
+type URL struct {
+ Scheme string // http, sip, file, etc. (never blank, always lowercase)
+ User string // who is you yo
+ Pass string // for like, logging in
+ Host string // IP 4/6 address or hostname (mandatory)
+ Port int // like 80 or 5060 (default 0)
+ Params string // stuff after ';' (NOT UNESCAPED, used in sip)
+ Path string // stuff starting with '/'
+ Query string // stuff after '?' (NOT UNESCAPED)
+ Fragment string // stuff after '#'
+}
+
+%% machine url;
+%% write data;
+
+// i parse absolute urls and don't suck at it. i'll parse just about
+// any type of url you can think of and give you a human-friendly data
+// structure.
+//
+// this routine takes no more than a few microseconds, is reentrant,
+// performs in a predictable manner (for security/soft-realtime,)
+// doesn't modify your `data` buffer, and under no circumstances will
+// it panic (i hope!)
+func URLParse(data []byte) (url *URL, err error) {
+ cs, p, pe, eof := 0, 0, len(data), len(data)
+ mark := 0
+ url = new(URL)
+
+ // this buffer is so we can unescape while we roll
+ var hex byte
+ buf := make([]byte, len(data))
+ amt := 0
+
+ %%{
+ action mark { mark = p }
+ action str_start { amt = 0 }
+ action str_char { buf[amt] = fc; amt++ }
+ action str_lower { buf[amt] = fc + 0x20; amt++ }
+ action hex_hi { hex = unhex(fc) * 16 }
+ action hex_lo { hex += unhex(fc)
+ buf[amt] = hex; amt++ }
+ action scheme { url.Scheme = string(buf[0:amt]) }
+ action authority { err = url.parseAuthority(data[mark:p])
+ if err != nil { return nil, err } }
+ action path { url.Path = string(buf[0:amt]) }
+ action query { url.Query = string(data[mark:p]) }
+ action fragment { url.Fragment = string(buf[0:amt]) }
+
+ # # do this instead if you *actually* use URNs (lol)
+ # action authority { url.Authority = string(data[mark:p]) }
+
+ # define what a single character is allowed to be
+ toxic = ( cntrl | 127 ) ;
+ scary = ( toxic | " " | "\"" | "#" | "%" | "<" | ">" ) ;
+ schmchars = ( lower | digit | "+" | "-" | "." ) ;
+ authchars = any -- ( scary | "/" | "?" | "#" ) ;
+ pathchars = any -- ( scary | "?" | "#" ) ;
+ querchars = any -- ( scary | "#" ) ;
+ fragchars = any -- ( scary ) ;
+
+ # define how characters trigger actions
+ escape = "%" xdigit xdigit ;
+ unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
+ schmfirst = ( upper @str_lower ) | ( lower @str_char ) ;
+ schmchar = ( upper @str_lower ) | ( schmchars @str_char ) ;
+ authchar = escape | authchars ;
+ pathchar = unescape | ( pathchars @str_char ) ;
+ querchar = escape | querchars ;
+ fragchar = unescape | ( fragchars @str_char ) ;
+
+ # define multi-character patterns
+ scheme = ( schmfirst schmchar* ) >str_start %scheme ;
+ authority = authchar+ >mark %authority ;
+ path = ( ( "/" @str_char ) pathchar* ) >str_start %path ;
+ query = "?" ( querchar* >mark %query ) ;
+ fragment = "#" ( fragchar* >str_start %fragment ) ;
+ url = scheme ":" "//"? authority path? query? fragment?
+ | scheme ":" "//" authority? path? query? fragment?
+ ;
+
+ main := url;
+ write init;
+ write exec;
+ }%%
+
+ if cs < url_first_final {
+ if p == pe {
+ return nil, errors.New(
+ fmt.Sprintf("unexpected eof: %s", data))
+ } else {
+ return nil, errors.New(
+ fmt.Sprintf("error in url at pos %d: %s", p, data))
+ }
+ }
+
+ return url, nil
+}
+
+func unhex(b byte) byte {
+ switch {
+ case '0' <= b && b <= '9':
+ return b - '0'
+ case 'a' <= b && b <= 'f':
+ return b - 'a' + 10
+ case 'A' <= b && b <= 'F':
+ return b - 'A' + 10
+ }
+ return 0
+}
+
+//////////////////////////////////////////////////////////////////////
+
+type urlTest struct {
+ s []byte
+ url URL
+}
+
+var urlTests = []urlTest{
+
+ urlTest{
+ []byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"),
+ URL{
+ Scheme: "http",
+ User: "user",
+ Pass: "pass",
+ Host: "example.com",
+ Port: 80,
+ Params: "hello",
+ Path: "/lol.php",
+ Query: "fun",
+ Fragment: "omg",
+ },
+ },
+
+ urlTest{
+ []byte("a:b"),
+ URL{
+ Scheme: "a",
+ Host: "b",
+ },
+ },
+
+ urlTest{
+ []byte("GoPHeR://@example.com@:;/?#"),
+ URL{
+ Scheme: "gopher",
+ Host: "@example.com@",
+ Path: "/",
+ },
+ },
+
+ urlTest{
+ []byte("ldap://[2001:db8::7]/c=GB?objectClass/?one"),
+ URL{
+ Scheme: "ldap",
+ Host: "2001:db8::7",
+ Path: "/c=GB",
+ Query: "objectClass/?one",
+ },
+ },
+
+ urlTest{
+ []byte("http://user@example.com"),
+ URL{
+ Scheme: "http",
+ User: "user",
+ Host: "example.com",
+ },
+ },
+
+ urlTest{
+ []byte("http://品研发和研发管@☃.com:65000;%20"),
+ URL{
+ Scheme: "http",
+ User: "品研发和研发管",
+ Host: "☃.com",
+ Port: 65000,
+ Params: "%20",
+ },
+ },
+
+ urlTest{
+ []byte("https://example.com:80"),
+ URL{
+ Scheme: "https",
+ Host: "example.com",
+ Port: 80,
+ },
+ },
+
+ urlTest{
+ []byte("file:///etc/passwd"),
+ URL{
+ Scheme: "file",
+ Path: "/etc/passwd",
+ },
+ },
+
+ urlTest{
+ []byte("file:///c:/WINDOWS/clock.avi"),
+ URL{
+ Scheme: "file",
+ Path: "/c:/WINDOWS/clock.avi", // <-- is this kosher?
+ },
+ },
+
+ urlTest{
+ []byte("file://hostname/path/to/the%20file.txt"),
+ URL{
+ Scheme: "file",
+ Host: "hostname",
+ Path: "/path/to/the file.txt",
+ },
+ },
+
+ urlTest{
+ []byte("sip:example.com"),
+ URL{
+ Scheme: "sip",
+ Host: "example.com",
+ },
+ },
+
+ urlTest{
+ []byte("sip:example.com:5060"),
+ URL{
+ Scheme: "sip",
+ Host: "example.com",
+ Port: 5060,
+ },
+ },
+
+ urlTest{
+ []byte("mailto:ditto@pokémon.com"),
+ URL{
+ Scheme: "mailto",
+ User: "ditto",
+ Host: "pokémon.com",
+ },
+ },
+
+ urlTest{
+ []byte("sip:[dead:beef::666]:5060"),
+ URL{
+ Scheme: "sip",
+ Host: "dead:beef::666",
+ Port: 5060,
+ },
+ },
+
+ urlTest{
+ []byte("tel:+12126660420"),
+ URL{
+ Scheme: "tel",
+ Host: "+12126660420",
+ },
+ },
+
+ urlTest{
+ []byte("sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg"),
+ URL{
+ Scheme: "sip",
+ User: "bob barker",
+ Pass: "priceisright",
+ Host: "dead:beef::666",
+ Port: 5060,
+ Params: "isup-oli=00",
+ Path: "/palfun.html",
+ Query: "haha",
+ Fragment: "omg",
+ },
+ },
+
+ urlTest{
+ []byte("http://www.google.com/search?%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai="),
+ URL{
+ Scheme: "http",
+ Host: "www.google.com",
+ Path: "/search",
+ Query: "%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=",
+ },
+ },
+
+}
+
+func (test *urlTest) compare(url *URL) (passed bool) {
+ if url.Scheme != test.url.Scheme {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) scheme: %#v != %#v\n",
+ string(test.s), url.Scheme, test.url.Scheme)
+ passed = true
+ }
+ if url.User != test.url.User {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) user: %#v != %#v\n",
+ string(test.s), url.User, test.url.User)
+ passed = true
+ }
+ if url.Pass != test.url.Pass {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) pass: %#v != %#v\n",
+ string(test.s), url.Pass, test.url.Pass)
+ passed = true
+ }
+ if url.Host != test.url.Host {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) host: %#v != %#v\n",
+ string(test.s), url.Host, test.url.Host)
+ passed = true
+ }
+ if url.Port != test.url.Port {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n",
+ string(test.s), url.Port, test.url.Port)
+ passed = true
+ }
+ if url.Port != test.url.Port {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n",
+ string(test.s), url.Port, test.url.Port)
+ passed = true
+ }
+ if url.Params != test.url.Params {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) params: %#v != %#v\n",
+ string(test.s), url.Params, test.url.Params)
+ passed = true
+ }
+ if url.Path != test.url.Path {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) path: %#v != %#v\n",
+ string(test.s), url.Path, test.url.Path)
+ passed = true
+ }
+ if url.Query != test.url.Query {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) query: %#v != %#v\n",
+ string(test.s), url.Query, test.url.Query)
+ passed = true
+ }
+ if url.Fragment != test.url.Fragment {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) fragment: %#v != %#v\n",
+ string(test.s), url.Fragment, test.url.Fragment)
+ passed = true
+ }
+ return !passed
+}
+
+func bench() {
+ const rounds = 10000
+ for _, s := range [][]byte{
+ []byte("a:a"),
+ []byte("http://google.com/"),
+ []byte("sip:jtunney@lobstertech.com"),
+ []byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"),
+ []byte("file:///etc/passwd"),
+ } {
+ ts1 := time.Now()
+ for i := 0; i < rounds; i++ {
+ URLParse(s)
+ }
+ ts2 := time.Now()
+ fmt.Printf("BENCH URLParse(%s) -> %d ns\n", s, ts2.Sub(ts1).Nanoseconds() / rounds)
+ }
+}
+
+func test() (rc int) {
+ for _, test := range urlTests {
+ url, err := URLParse(test.s)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "FAIL url(%#v) %s\n", string(test.s), err)
+ rc = 1
+ continue
+ }
+ if !test.compare(url) {
+ rc = 1
+ }
+ }
+ return rc
+}
+
+func main() {
+ rc := test()
+ if rc == 0 {
+ bench()
+ }
+ os.Exit(rc)
+}
diff --git a/examples/go/url_authority.rl b/examples/go/url_authority.rl
new file mode 100644
index 00000000..3e651ad0
--- /dev/null
+++ b/examples/go/url_authority.rl
@@ -0,0 +1,165 @@
+// -*-go-*-
+//
+// URL Parser
+// Copyright (c) 2010 J.A. Roberts Tunney
+// MIT License
+//
+
+package main
+
+import (
+ "errors"
+ "fmt"
+ "strconv"
+)
+
+%% machine url_authority;
+%% write data;
+
+// i parse strings like `alice@pokémon.com`.
+//
+// sounds simple right? but i also parse stuff like:
+//
+// bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00
+//
+// which in actual reality is:
+//
+// - User: "bob barker"
+// - Pass: "priceisright"
+// - Host: "dead:beef::666"
+// - Port: 5060
+// - Params: "isup-oli=00"
+//
+// which was probably extracted from an absolute url that looked like:
+//
+// sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg
+//
+// which was probably extracted from its address form:
+//
+// "Bob Barker" <sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg>;tag=666
+//
+// who would have thought this could be so hard ._.
+func (url *URL) parseAuthority(data []byte) (err error) {
+ cs, p, pe, eof := 0, 0, len(data), len(data)
+ mark := 0
+
+ // temporary holding place for user:pass and/or host:port cuz an
+ // optional term (user[:pass]) coming before a mandatory term
+ // (host[:pass]) would require require backtracking and all that
+ // evil nondeterministic stuff which ragel seems to hate. (for
+ // this same reason you're also allowed to use square quotes
+ // around the username.)
+ var b1, b2 string
+
+ // this buffer is so we can unescape while we roll
+ var hex byte
+ buf := make([]byte, len(data))
+ amt := 0
+
+ %%{
+ action mark { mark = p }
+ action str_start { amt = 0 }
+ action str_char { buf[amt] = fc; amt++ }
+ action hex_hi { hex = unhex(fc) * 16 }
+ action hex_lo { hex += unhex(fc)
+ buf[amt] = hex; amt++ }
+ action copy_b1 { b1 = string(buf[0:amt]); amt = 0 }
+ action copy_b2 { b2 = string(buf[0:amt]); amt = 0 }
+ action copy_host { url.Host = string(b1); amt = 0 }
+
+ action copy_port {
+ if b2 != "" {
+ url.Port, err = strconv.Atoi(string(b2))
+ if err != nil { goto fail }
+ if url.Port > 65535 { goto fail }
+ }
+ }
+
+ action params {
+ url.Params = string(data[mark:p])
+ }
+
+ action params_eof {
+ url.Params = string(data[mark:p])
+ return nil
+ }
+
+ action atsymbol {
+ url.User = string(b1)
+ url.Pass = string(b2)
+ b2 = ""
+ }
+
+ action alldone {
+ url.Host = string(b1)
+ if url.Host == "" {
+ url.Host = string(buf[0:amt])
+ } else {
+ if amt > 0 {
+ b2 = string(buf[0:amt])
+ }
+ if b2 != "" {
+ url.Port, err = strconv.Atoi(string(b2))
+ if err != nil { goto fail }
+ if url.Port > 65535 { goto fail }
+ }
+ }
+ return nil
+ }
+
+ # define what a single character is allowed to be
+ toxic = ( cntrl | 127 ) ;
+ scary = ( toxic | space | "\"" | "#" | "%" | "<" | ">" ) ;
+ authdelims = ( "/" | "?" | "#" | ":" | "@" | ";" | "[" | "]" ) ;
+ userchars = any -- ( authdelims | scary ) ;
+ userchars_esc = userchars | ":" ;
+ passchars = userchars ;
+ hostchars = passchars | "@" ;
+ hostchars_esc = hostchars | ":" ;
+ portchars = digit ;
+ paramchars = hostchars | ":" | ";" ;
+
+ # define how characters trigger actions
+ escape = "%" xdigit xdigit ;
+ unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
+ userchar = unescape | ( userchars @str_char ) ;
+ userchar_esc = unescape | ( userchars_esc @str_char ) ;
+ passchar = unescape | ( passchars @str_char ) ;
+ hostchar = unescape | ( hostchars @str_char ) ;
+ hostchar_esc = unescape | ( hostchars_esc @str_char ) ;
+ portchar = unescape | ( portchars @str_char ) ;
+ paramchar = escape | paramchars ;
+
+ # define multi-character patterns
+ user_plain = userchar+ >str_start %copy_b1 ;
+ user_quoted = "[" ( userchar_esc+ >str_start %copy_b1 ) "]" ;
+ user = ( user_quoted | user_plain ) %/alldone ;
+ pass = passchar+ >str_start %copy_b2 %/alldone ;
+ host_plain = hostchar+ >str_start %copy_b1 %copy_host ;
+ host_quoted = "[" ( hostchar_esc+ >str_start %copy_b1 %copy_host ) "]" ;
+ host = ( host_quoted | host_plain ) %/alldone ;
+ port = portchar* >str_start %copy_b2 %copy_port %/alldone ;
+ params = ";" ( paramchar* >mark %params %/params_eof ) ;
+ userpass = user ( ":" pass )? ;
+ hostport = host ( ":" port )? ;
+ authority = ( userpass ( "@" @atsymbol ) )? hostport params? ;
+
+ main := authority;
+ write init;
+ write exec;
+ }%%
+
+ // if cs >= url_authority_first_final {
+ // return nil
+ // }
+
+fail:
+ // fmt.Println("error state", cs)
+ // fmt.Println(string(data))
+ // for i := 0; i < p; i++ {
+ // fmt.Print(" ")
+ // }
+ // fmt.Println("^")
+ // fmt.Println(url)
+ return errors.New(fmt.Sprintf("bad url authority: %#v", string(data)))
+}
diff --git a/examples/gotocallret.rl b/examples/gotocallret.rl
new file mode 100644
index 00000000..32c01a2c
--- /dev/null
+++ b/examples/gotocallret.rl
@@ -0,0 +1,96 @@
+/*
+ * Demonstrate the use of goto, call and return. This machine expects either a
+ * lower case char or a digit as a command then a space followed by the command
+ * arg. If the command is a char, then the arg must be an a string of chars.
+ * If the command is a digit, then the arg must be a string of digits. This
+ * choice is determined by action code, rather than though transition
+ * desitinations.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+struct GotoCallRet
+{
+ char comm;
+ int cs, top, stack[32];
+
+ int init( );
+ int execute( const char *data, int len, bool isEof );
+ int finish( );
+};
+
+%%{
+ machine GotoCallRet;
+
+ # Error machine, consumes to end of
+ # line, then starts the main line over.
+ garble_line := (
+ (any-'\n')*'\n'
+ ) >{cout << "error: garbling line" << endl;} @{fgoto main;};
+
+ # Look for a string of alphas or of digits,
+ # on anything else, hold the character and return.
+ alp_comm := alpha+ $!{fhold;fret;};
+ dig_comm := digit+ $!{fhold;fret;};
+
+ # Choose which to machine to call into based on the command.
+ action comm_arg {
+ if ( comm >= 'a' )
+ fcall alp_comm;
+ else
+ fcall dig_comm;
+ }
+
+ # Specifies command string. Note that the arg is left out.
+ command = (
+ [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n'
+ ) @{cout << "correct command" << endl;};
+
+ # Any number of commands. If there is an
+ # error anywhere, garble the line.
+ main := command* $!{fhold;fgoto garble_line;};
+}%%
+
+%% write data;
+
+int GotoCallRet::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int GotoCallRet::execute( const char *data, int len, bool isEof )
+{
+ const char *p = data;
+ const char *pe = data + len;
+ const char *eof = isEof ? pe : 0;
+
+ %% write exec;
+ if ( cs == GotoCallRet_error )
+ return -1;
+ if ( cs >= GotoCallRet_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+
+ GotoCallRet gcr;
+ gcr.init();
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 )
+ gcr.execute( buf, strlen(buf), false );
+
+ gcr.execute( 0, 0, true );
+ if ( gcr.cs < GotoCallRet_first_final )
+ cerr << "gotocallret: error: parsing input" << endl;
+ return 0;
+}
diff --git a/examples/mailbox.rl b/examples/mailbox.rl
new file mode 100644
index 00000000..94590fdd
--- /dev/null
+++ b/examples/mailbox.rl
@@ -0,0 +1,207 @@
+/*
+ * Parses unix mail boxes into headers and bodies.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+#define BUFSIZE 2048
+
+/* A growable buffer for collecting headers. */
+struct Buffer
+{
+ Buffer() : data(0), allocated(0), length(0) { }
+ ~Buffer() { empty(); }
+
+ void append( char p ) {
+ if ( ++length > allocated )
+ upAllocate( length*2 );
+ data[length-1] = p;
+ }
+
+ void clear() { length = 0; }
+ void upAllocate( int len );
+ void empty();
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+
+struct MailboxScanner
+{
+ Buffer headName;
+ Buffer headContent;
+
+ int cs, top, stack[1];
+
+ int init( );
+ int execute( const char *data, int len, bool isEof );
+ int finish( );
+};
+
+%%{
+ machine MailboxScanner;
+
+ # Buffer the header names.
+ action bufHeadName { headName.append(fc); }
+
+ # Prints a blank line after the end of the headers of each message.
+ action blankLine { cout << endl; }
+
+ # Helpers we will use in matching the date section of the from line.
+ day = /[A-Z][a-z][a-z]/;
+ month = /[A-Z][a-z][a-z]/;
+ year = /[0-9][0-9][0-9][0-9]/;
+ time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' );
+ letterZone = /[A-Z][A-Z][A-Z]/;
+ numZone = /[+\-][0-9][0-9][0-9][0-9]/;
+ zone = letterZone | numZone;
+ dayNum = /[0-9 ][0-9]/;
+
+ # These are the different formats of the date minus an obscure
+ # type that has a funny string 'remote from xxx' on the end. Taken
+ # from c-client in the imap-2000 distribution.
+ date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' .
+ ( year | year . ' ' . zone | zone . ' ' . year );
+
+ # From lines separate messages. We will exclude fromLine from a message
+ # body line. This will cause us to stay in message line up until an
+ # entirely correct from line is matched.
+ fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n';
+
+ # The types of characters that can be used as a header name.
+ hchar = print - [ :];
+
+ # Simply eat up an uninteresting header. Return at the first non-ws
+ # character following a newline.
+ consumeHeader := (
+ [^\n] |
+ '\n' [ \t] |
+ '\n' [^ \t] @{fhold; fret;}
+ )*;
+
+ action hchar {headContent.append(fc);}
+ action hspace {headContent.append(' ');}
+
+ action hfinish {
+ headContent.append(0);
+ cout << headContent.data << endl;
+ headContent.clear();
+ fhold;
+ fret;
+ }
+
+ # Display the contents of a header as it is consumed. Collapses line
+ # continuations to a single space.
+ printHeader := (
+ [^\n] @hchar |
+ ( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace
+ )** $!hfinish;
+
+ action onHeader
+ {
+ headName.append(0);
+ if ( strcmp( headName.data, "From" ) == 0 ||
+ strcmp( headName.data, "To" ) == 0 ||
+ strcmp( headName.data, "Subject" ) == 0 )
+ {
+ /* Print the header name, then jump to a machine the will display
+ * the contents. */
+ cout << headName.data << ":";
+ headName.clear();
+ fcall printHeader;
+ }
+
+ headName.clear();
+ fcall consumeHeader;
+ }
+
+ header = hchar+ $bufHeadName ':' @onHeader;
+
+ # Exclude fromLine from a messageLine, otherwise when encountering a
+ # fromLine we will be simultaneously matching the old message and a new
+ # message.
+ messageLine = ( [^\n]* '\n' - fromLine );
+
+ # An entire message.
+ message = ( fromLine . header* . '\n' @blankLine . messageLine* );
+
+ # File is a series of messages.
+ main := message*;
+}%%
+
+%% write data;
+
+int MailboxScanner::init( )
+{
+ %% write init;
+ return 1;
+}
+
+int MailboxScanner::execute( const char *data, int len, bool isEof )
+{
+ const char *p = data;
+ const char *pe = data + len;
+ const char *eof = isEof ? pe : 0;
+
+ %% write exec;
+
+ if ( cs == MailboxScanner_error )
+ return -1;
+ if ( cs >= MailboxScanner_first_final )
+ return 1;
+ return 0;
+}
+
+int MailboxScanner::finish( )
+{
+ if ( cs == MailboxScanner_error )
+ return -1;
+ if ( cs >= MailboxScanner_first_final )
+ return 1;
+ return 0;
+}
+
+
+void Buffer::empty()
+{
+ if ( data != 0 ) {
+ free( data );
+
+ data = 0;
+ length = 0;
+ allocated = 0;
+ }
+}
+
+void Buffer::upAllocate( int len )
+{
+ if ( data == 0 )
+ data = (char*) malloc( len );
+ else
+ data = (char*) realloc( data, len );
+ allocated = len;
+}
+
+MailboxScanner mailbox;
+char buf[BUFSIZE];
+
+int main()
+{
+ mailbox.init();
+ while ( 1 ) {
+ int len = fread( buf, 1, BUFSIZE, stdin );
+ mailbox.execute( buf, len, len != BUFSIZE );
+ if ( len != BUFSIZE )
+ break;
+ }
+ if ( mailbox.finish() <= 0 )
+ cerr << "mailbox: error parsing input" << endl;
+ return 0;
+}
diff --git a/examples/params.rl b/examples/params.rl
new file mode 100644
index 00000000..a8ffeae9
--- /dev/null
+++ b/examples/params.rl
@@ -0,0 +1,102 @@
+/*
+ * Parse command line arguments.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define BUFLEN 1024
+
+struct params
+{
+ char buffer[BUFLEN+1];
+ int buflen;
+ int cs;
+};
+
+%%{
+ machine params;
+ access fsm->;
+
+ # A buffer to collect argurments
+
+ # Append to the buffer.
+ action append {
+ if ( fsm->buflen < BUFLEN )
+ fsm->buffer[fsm->buflen++] = fc;
+ }
+
+ # Terminate a buffer.
+ action term {
+ if ( fsm->buflen < BUFLEN )
+ fsm->buffer[fsm->buflen++] = 0;
+ }
+
+ # Clear out the buffer
+ action clear { fsm->buflen = 0; }
+
+ action help { printf("help\n"); }
+ action version { printf("version\n"); }
+ action output { printf("output: \"%s\"\n", fsm->buffer); }
+ action spec { printf("spec: \"%s\"\n", fsm->buffer); }
+ action mach { printf("machine: \"%s\"\n", fsm->buffer); }
+
+ # Helpers that collect strings
+ string = [^\0]+ >clear $append %term;
+
+ # Different arguments.
+ help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help;
+ version = ( '-v' | '--version' ) 0 @version;
+ output = '-o' 0? string 0 @output;
+ spec = '-S' 0? string 0 @spec;
+ mach = '-M' 0? string 0 @mach;
+
+ main := (
+ help |
+ version |
+ output |
+ spec |
+ mach
+ )*;
+}%%
+
+%% write data;
+
+void params_init( struct params *fsm )
+{
+ fsm->buflen = 0;
+ %% write init;
+}
+
+void params_execute( struct params *fsm, const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+}
+
+int params_finish( struct params *fsm )
+{
+ if ( fsm->cs == params_error )
+ return -1;
+ if ( fsm->cs >= params_first_final )
+ return 1;
+ return 0;
+}
+
+#define BUFSIZE 2048
+
+int main( int argc, char **argv )
+{
+ int a;
+ struct params params;
+
+ params_init( &params );
+ for ( a = 1; a < argc; a++ )
+ params_execute( &params, argv[a], strlen(argv[a])+1 );
+ if ( params_finish( &params ) != 1 )
+ fprintf( stderr, "params: error processing arguments\n" );
+
+ return 0;
+}
diff --git a/examples/pullscan.rl b/examples/pullscan.rl
new file mode 100644
index 00000000..d9e8a579
--- /dev/null
+++ b/examples/pullscan.rl
@@ -0,0 +1,170 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define BUFSIZE 4096
+
+typedef struct _Scanner {
+ /* Scanner state. */
+ int cs;
+ int act;
+ int have;
+ int curline;
+ char *ts;
+ char *te;
+ char *p;
+ char *pe;
+ char *eof;
+ FILE *file;
+ int done;
+
+ /* Token data */
+ char *data;
+ int len;
+ int value;
+
+ char buf[BUFSIZE];
+} Scanner;
+
+
+%%{
+ machine Scanner;
+ write data;
+}%%
+
+void scan_init( Scanner *s, FILE *file )
+{
+ memset (s, '\0', sizeof(Scanner));
+ s->curline = 1;
+ s->file = file;
+ s->eof = 0;
+ %% write init;
+}
+
+#define TK_NO_TOKEN (-1)
+#define TK_ERR 128
+#define TK_EOF 129
+#define TK_Identifier 130
+#define TK_Number 131
+#define TK_String 132
+
+#define ret_tok( _tok ) token = _tok; s->data = s->ts
+
+int scan( Scanner *s )
+{
+ int token = TK_NO_TOKEN;
+ int space, readlen;
+
+ while ( 1 ) {
+ if ( s->p == s->pe ) {
+ printf("scanner: need more data\n");
+
+ if ( s->ts == 0 )
+ s->have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ printf("scanner: buffer broken mid token\n");
+ s->have = s->pe - s->ts;
+ memmove( s->buf, s->ts, s->have );
+ s->te -= (s->ts-s->buf);
+ s->ts = s->buf;
+ }
+
+ s->p = s->buf + s->have;
+ space = BUFSIZE - s->have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. */
+ printf("scanner: out of buffer space\n");
+ return TK_ERR;
+ }
+
+ if ( s->done ) {
+ printf("scanner: end of file\n");
+ s->p[0] = 0;
+ readlen = 1;
+ }
+ else {
+ readlen = fread( s->p, 1, space, s->file );
+ if ( readlen < space )
+ s->done = 1;
+ }
+
+ s->pe = s->p + readlen;
+ }
+
+ %%{
+ machine Scanner;
+ access s->;
+ variable p s->p;
+ variable pe s->pe;
+ variable eof s->eof;
+
+ main := |*
+
+ # Identifiers
+ ( [a-zA-Z_] [a-zA-Z0-9_]* ) =>
+ { ret_tok( TK_Identifier ); fbreak; };
+
+ # Whitespace
+ [ \t\n];
+
+ '"' ( [^\\"] | '\\' any ) * '"' =>
+ { ret_tok( TK_String ); fbreak; };
+
+ # Number
+ digit+ =>
+ { ret_tok( TK_Number ); fbreak; };
+
+ # EOF
+ 0 =>
+ { ret_tok( TK_EOF ); fbreak; };
+
+ # Anything else
+ any =>
+ { ret_tok( *s->p ); fbreak; };
+
+ *|;
+
+ write exec;
+ }%%
+
+ if ( s->cs == Scanner_error )
+ return TK_ERR;
+
+ if ( token != TK_NO_TOKEN ) {
+ s->len = s->p - s->data;
+ return token;
+ }
+ }
+}
+
+
+int main (int argc, char** argv)
+{
+ Scanner ss;
+ int tok;
+
+ scan_init(&ss, stdin);
+
+ while ( 1 ) {
+ tok = scan (&ss);
+ if ( tok == TK_EOF ) {
+ printf ("parser: EOF\n");
+ break;
+ }
+ else if ( tok == TK_ERR ) {
+ printf ("parser: ERR\n");
+ break;
+ }
+ else {
+ printf ("parser: %d \"", tok);
+ fwrite ( ss.data, 1, ss.len, stdout );
+ printf ("\"\n" );
+ }
+ }
+
+ return 0;
+}
+
+
diff --git a/examples/rlscan.rl b/examples/rlscan.rl
new file mode 100644
index 00000000..d4d4bf97
--- /dev/null
+++ b/examples/rlscan.rl
@@ -0,0 +1,300 @@
+/*
+ * Lexes Ragel input files.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+void escapeXML( char *data )
+{
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void escapeXML( char c )
+{
+ switch ( c ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << c; break;
+ }
+}
+
+void escapeXML( char *data, int len )
+{
+ for ( char *end = data + len; data != end; data++ ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ }
+}
+
+inline void write( const char *data )
+{
+ cout << data;
+}
+
+inline void write( char c )
+{
+ cout << c;
+}
+
+inline void write( char *data, int len )
+{
+ cout.write( data, len );
+}
+
+
+%%{
+ machine RagelScan;
+
+ word = [a-zA-Z_][a-zA-Z_0-9]*;
+ integer = [0-9]+;
+ hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+ default = ^0;
+ EOF = 0;
+
+ # Handles comments in outside code and inline blocks.
+ c_comment :=
+ ( default* :>> '*/' )
+ ${ escapeXML( fc ); }
+ @{ fret; };
+
+ action emit {
+ escapeXML( ts, te-ts );
+ }
+
+ #
+ # Inline action code
+ #
+
+ ilscan := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+ '/*' {
+ write( "/*" );
+ fcall c_comment;
+ };
+ '//' [^\n]* '\n' => emit;
+
+ '{' {
+ write( '{' );
+ inline_depth += 1;
+ };
+
+ '}' {
+ write( '}' );
+ /* If dropping down to the last } then return
+ * to ragel code. */
+ if ( --inline_depth == 0 ) {
+ write( "</inline>\n" );
+ fgoto rlscan;
+ }
+ };
+
+ default => { escapeXML( *ts ); };
+ *|;
+
+ #
+ # Ragel Tokens
+ #
+
+ rlscan := |*
+ '}%%' {
+ if ( !single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ '\n' {
+ if ( single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ # Word
+ word {
+ write( "<word>" );
+ write( ts, te-ts );
+ write( "</word>\n" );
+ };
+
+ # Decimal integer.
+ integer {
+ write( "<int>" );
+ write( ts, te-ts );
+ write( "</int>\n" );
+ };
+
+ # Hexidecimal integer.
+ hex {
+ write( "<hex>" );
+ write( ts, te-ts );
+ write( "</hex>\n" );
+ };
+
+ # Consume comments.
+ '#' [^\n]* '\n';
+
+ # Single literal string.
+ "'" ( [^'\\] | /\\./ )* "'" {
+ write( "<single_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</single_lit>\n" );
+ };
+
+ # Double literal string.
+ '"' ( [^"\\] | /\\./ )* '"' {
+ write( "<double_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</double_lit>\n" );
+ };
+
+ # Or literal.
+ '[' ( [^\]\\] | /\\./ )* ']' {
+ write( "<or_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</or_lit>\n" );
+ };
+
+ # Regex Literal.
+ '/' ( [^/\\] | /\\./ ) * '/' {
+ write( "<re_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</re_lit>\n" );
+ };
+
+ # Open an inline block
+ '{' {
+ inline_depth = 1;
+ write( "<inline>{" );
+ fgoto ilscan;
+ };
+
+ punct {
+ write( "<symbol>" );
+ escapeXML( fc );
+ write( "</symbol>\n" );
+ };
+
+ default;
+ *|;
+
+ #
+ # Outside code.
+ #
+
+ main := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+
+ '/*' {
+ escapeXML( ts, te-ts );
+ fcall c_comment;
+ };
+
+ '//' [^\n]* '\n' => emit;
+
+ '%%{' {
+ write( "<section>\n" );
+ single_line = false;
+ fgoto rlscan;
+ };
+
+ '%%' {
+ write( "<section>\n" );
+ single_line = true;
+ fgoto rlscan;
+ };
+
+ default {
+ escapeXML( *ts );
+ };
+
+ # EOF.
+ EOF;
+ *|;
+}%%
+
+%% write data nofinal;
+
+#define BUFSIZE 2048
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ int cs, act;
+ char *ts, *te;
+ int stack[1], top;
+
+ static char inbuf[BUFSIZE];
+ bool single_line = false;
+ int inline_depth = 0;
+
+ %% write init;
+
+ bool done = false;
+ int have = 0;
+ while ( !done ) {
+ /* How much space is in the buffer? */
+ int space = BUFSIZE - have;
+ if ( space == 0 ) {
+ /* Buffer is full. */
+ cerr << "TOKEN TOO BIG" << endl;
+ exit(1);
+ }
+
+ /* Read in a block. */
+ char *p = inbuf + have;
+ cin.read( p, space );
+ int len = cin.gcount();
+ char *pe = p + len;
+ char *eof = 0;
+
+ /* Check for EOF. */
+ if ( len == 0 ) {
+ eof = pe;
+ done = true;
+ }
+
+ %% write exec;
+
+ if ( cs == RagelScan_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+
+ if ( ts == 0 )
+ have = 0;
+ else {
+ /* There is a prefix to preserve, shift it over. */
+ have = pe - ts;
+ memmove( inbuf, ts, have );
+ te = inbuf + (te-ts);
+ ts = inbuf;
+ }
+ }
+ return 0;
+}
diff --git a/examples/statechart.rl b/examples/statechart.rl
new file mode 100644
index 00000000..a04471b5
--- /dev/null
+++ b/examples/statechart.rl
@@ -0,0 +1,116 @@
+/*
+ * Demonstrate the use of labels, the epsilon operator, and the join operator
+ * for creating machines using the named state and transition list paradigm.
+ * This implementes the same machine as the atoi example.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+struct StateChart
+{
+ bool neg;
+ int val;
+ int cs;
+
+ int init( );
+ int execute( const char *data, int len );
+ int finish( );
+};
+
+%%{
+ machine StateChart;
+
+ action begin {
+ neg = false;
+ val = 0;
+ }
+
+ action see_neg {
+ neg = true;
+ }
+
+ action add_digit {
+ val = val * 10 + (fc - '0');
+ }
+
+ action finish {
+ if ( neg )
+ val = -1 * val;
+ }
+
+ atoi = (
+ start: (
+ '-' @see_neg ->om_num |
+ '+' ->om_num |
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # One or more nums.
+ om_num: (
+ [0-9] @add_digit ->more_nums
+ ),
+
+ # Zero ore more nums.
+ more_nums: (
+ [0-9] @add_digit ->more_nums |
+ '' -> final
+ )
+ ) >begin %finish;
+
+ main := ( atoi '\n' @{ cout << val << endl; } )*;
+}%%
+
+%% write data;
+
+int StateChart::init( )
+{
+ neg = false;
+ val = false;
+ %% write init;
+ return 1;
+}
+
+int StateChart::execute( const char *data, int len )
+{
+ const char *p = data;
+ const char *pe = data + len;
+
+ %% write exec;
+
+ if ( cs == StateChart_error )
+ return -1;
+ if ( cs >= StateChart_first_final )
+ return 1;
+ return 0;
+}
+
+int StateChart::finish( )
+{
+ if ( cs == StateChart_error )
+ return -1;
+ if ( cs >= StateChart_first_final )
+ return 1;
+ return 0;
+}
+
+
+#define BUFSIZE 1024
+
+int main()
+{
+ char buf[BUFSIZE];
+
+ StateChart atoi;
+ atoi.init();
+ while ( fgets( buf, sizeof(buf), stdin ) != 0 ) {
+ atoi.execute( buf, strlen(buf) );
+ }
+ if ( atoi.finish() <= 0 )
+ cerr << "statechart: error: parsing input" << endl;
+ return 0;
+}
diff --git a/examples/uri.rl b/examples/uri.rl
new file mode 100644
index 00000000..185a76c6
--- /dev/null
+++ b/examples/uri.rl
@@ -0,0 +1,31 @@
+%%{
+ machine uri;
+
+ action scheme {}
+ action loc {}
+ action item {}
+ action query {}
+ action last {}
+ action nothing {}
+
+ main :=
+ # Scheme machine. This is ambiguous with the item machine. We commit
+ # to the scheme machine on colon.
+ ( [^:/?#]+ ':' @(colon,1) @scheme )?
+
+ # Location machine. This is ambiguous with the item machine. We remain
+ # ambiguous until a second slash, at that point and all points after
+ # we place a higher priority on staying in the location machine over
+ # moving into the item machine.
+ ( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )?
+
+ # Item machine. Ambiguous with both scheme and location, which both
+ # get a higher priority on the characters causing ambiguity.
+ ( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )?
+
+ # Last two components, the characters that initiate these machines are
+ # not supported in any previous components, therefore there are no
+ # ambiguities introduced by these parts.
+ ( '?' [^#]* %query %/query)?
+ ( '#' any* %/last )?;
+}%%