diff options
author | Adrian Thurston <thurston@complang.org> | 2007-01-21 22:58:22 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2007-01-21 22:58:22 +0000 |
commit | 86214ecf32c2bafd549952c93eab006d20327736 (patch) | |
tree | 9b5449ef42e829f98bf7a6c6e0554b88d4ab9132 /examples | |
download | ragel-86214ecf32c2bafd549952c93eab006d20327736.tar.gz |
Import from my private repository. Snapshot after version 5.16, immediately
following the rewrite of the parsers. Repository revision number 3961.
Diffstat (limited to 'examples')
30 files changed, 2554 insertions, 0 deletions
diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 00000000..e1e78089 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,37 @@ +# +# Copyright 2002-2003 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +SUBDIRS = \ + atoi awkemu clang concurrent format gotocallret mailbox params rlscan \ + statechart cppscan + +all: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) || exit 1; cd ..; done + +ps: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) ps || exit 1; cd ..; done + +clean: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) clean || exit 1; cd ..; done + +distclean: + @for dir in $(SUBDIRS); do cd $$dir; $(MAKE) distclean || exit 1; cd ..; done + diff --git a/examples/README b/examples/README new file mode 100644 index 00000000..12773cb3 --- /dev/null +++ b/examples/README @@ -0,0 +1,40 @@ + + Ragel State Machine Compiler -- Examples + ======================================== + +atoi -- Converts a string to an integer. + +awkemu -- Perfoms the basic parsing that the awk program perfoms on input. + The awk equivalent to awkemu is in awkemu/awkequiv.awk + +clang -- A scanner for a simple C like language. It breaks input up into + words, numbers, strings and symbols and strips out whitespace + and comments. It is a suitable template for writing a parser + that finds a sequence of tokens. + +concurrent -- Demonstrates the ability of ragel to produce parsers that + perform independent tasks concurrently. + +cppscan -- A C++ scanner that uses the longest match scanning method. This + example differs from other examples of scanning. Each run of the + state machine matches one token. This method results in a + smaller state machine since the final kleene star is omitted and + therefore every state does not need to get all the transitions + of the start state. + +format -- Partial printf implementation. + +gotocallret -- Demonstrate the use of fgoto, fcall and fret. + +mailbox -- Parses unix mailbox files. It breaks files into messages, and + messages into headers and body. It demonstrates Ragel's ability + to make parsers for structured file formats. + +params -- Parses command line arguements. + +rlscan -- Lexes Ragel input files. + +statechart -- Demonstrate the use of labels, the epsilon operator, and the + join operator for creating machines using the named state and + transition list paradigm. This implementes the same machine as + the atoi example. diff --git a/examples/atoi/Makefile b/examples/atoi/Makefile new file mode 100644 index 00000000..901de19a --- /dev/null +++ b/examples/atoi/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: atoi + +ps: atoi.ps + +atoi: atoi.o + g++ -g -o atoi atoi.o + +atoi.cpp: atoi.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) atoi.rl | $(RLCODEGEN) -G2 -o atoi.cpp + +atoi.o: atoi.cpp + g++ -Wall -g -c -O3 -o $@ $< + +atoi.ps: atoi.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) atoi.rl | $(RLCODEGEN) -V | dot -Tps > atoi.ps + +distclean clean: + rm -Rf *.o atoi.cpp atoi atoi.ps diff --git a/examples/atoi/atoi.rl b/examples/atoi/atoi.rl new file mode 100644 index 00000000..0d354a00 --- /dev/null +++ b/examples/atoi/atoi.rl @@ -0,0 +1,60 @@ +/* + * Convert a string to an integer. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +%%{ + machine atoi; + write data noerror; +}%% + +int atoi( char *str ) +{ + char *p = str; + int cs, val = 0; + bool neg = false;; + + %%{ + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + (fc - '0'); + } + + main := + ( '-'@see_neg | '+' )? ( digit @add_digit )+ + '\n' @{ fbreak; }; + + # Inintialize and execute. + write init; + write exec noend; + }%% + + if ( neg ) + val = -1 * val; + + if ( cs < atoi_first_final ) + cerr << "atoi: there was an error" << endl; + + return val; +}; + + +#define BUFSIZE 1024 + +int main() +{ + char buf[BUFSIZE]; + while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { + int value = atoi( buf ); + cout << value << endl; + } + return 0; +} diff --git a/examples/awkemu/Makefile b/examples/awkemu/Makefile new file mode 100644 index 00000000..5e6ecde4 --- /dev/null +++ b/examples/awkemu/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: awkemu + +ps: awkemu.ps + +awkemu: awkemu.o + gcc -g -o awkemu awkemu.o + +awkemu.c: awkemu.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) awkemu.rl | $(RLCODEGEN) -G2 -o awkemu.c + +awkemu.ps: awkemu.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) awkemu.rl | $(RLCODEGEN) -V | dot -Tps > awkemu.ps + +%.o: %.c + gcc -pedantic -Wall -g -c -O3 -o $@ $< + +distclean clean: + rm -Rf *.o awkemu.c awkemu awkemu.ps diff --git a/examples/awkemu/awkemu.rl b/examples/awkemu/awkemu.rl new file mode 100644 index 00000000..6615943d --- /dev/null +++ b/examples/awkemu/awkemu.rl @@ -0,0 +1,116 @@ +/* + * Perform the basic line parsing of input performed by awk. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +%%{ + machine awkemu; + + action start_word { + ws[nwords] = fpc; + } + + action end_word { + we[nwords++] = fpc; + } + + action start_line { + nwords = 0; + ls = fpc; + } + + action end_line { + printf("endline(%i): ", nwords ); + fwrite( ls, 1, p - ls, stdout ); + printf("\n"); + + for ( i = 0; i < nwords; i++ ) { + printf(" word: "); + fwrite( ws[i], 1, we[i] - ws[i], stdout ); + printf("\n"); + } + } + + # Words in a line. + word = ^[ \t\n]+; + + # The whitespace separating words in a line. + whitespace = [ \t]; + + # The components in a line to break up. Either a word or a single char of + # whitespace. On the word capture characters. + blineElements = word >start_word %end_word | whitespace; + + # Star the break line elements. Just be careful to decrement the leaving + # priority as we don't want multiple character identifiers to be treated as + # multiple single char identifiers. + line = ( blineElements** '\n' ) >start_line @end_line; + + # Any number of lines. + main := line*; +}%% + +%% write data noerror nofinal; + +#define MAXWORDS 256 +#define BUFSIZE 4096 +char buf[BUFSIZE]; + +int main() +{ + int i, nwords = 0; + char *ls = 0; + char *ws[MAXWORDS]; + char *we[MAXWORDS]; + + int cs; + int have = 0; + + %% write init; + + while ( 1 ) { + char *p, *pe, *data = buf + have; + int len, space = BUFSIZE - have; + /* fprintf( stderr, "space: %i\n", space ); */ + + if ( space == 0 ) { + fprintf(stderr, "buffer out of space\n"); + exit(1); + } + + len = fread( data, 1, space, stdin ); + /* fprintf( stderr, "len: %i\n", len ); */ + if ( len == 0 ) + break; + + /* Find the last newline by searching backwards. This is where + * we will stop processing on this iteration. */ + p = buf; + pe = buf + have + len - 1; + while ( *pe != '\n' && pe >= buf ) + pe--; + pe += 1; + + /* fprintf( stderr, "running on: %i\n", pe - p ); */ + + %% write exec; + + /* How much is still in the buffer. */ + have = data + len - pe; + if ( have > 0 ) + memmove( buf, pe, have ); + + /* fprintf(stderr, "have: %i\n", have ); */ + + if ( len < space ) + break; + } + + if ( have > 0 ) + fprintf(stderr, "input not newline terminated\n"); + return 0; +} diff --git a/examples/awkemu/awkequiv.awk b/examples/awkemu/awkequiv.awk new file mode 100755 index 00000000..9877dd36 --- /dev/null +++ b/examples/awkemu/awkequiv.awk @@ -0,0 +1,10 @@ +#!/usr/bin/awk -f +# + + +{ + print "endline(" NF "): " $0 + for ( i = 1; i <= NF; i++ ) { + print " word: " $i + } +} diff --git a/examples/clang/Makefile b/examples/clang/Makefile new file mode 100644 index 00000000..d3054060 --- /dev/null +++ b/examples/clang/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: clang + +ps: clang.ps + +clang: clang.o + gcc -g -o clang clang.o + +clang.c: clang.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) clang.rl | $(RLCODEGEN) -G2 -o clang.c + +clang.ps: clang.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) clang.rl | $(RLCODEGEN) -V | dot -Tps > clang.ps + +%.o: %.c + gcc -pedantic -Wall -O3 -g -c -o $@ $< + +distclean clean: + rm -Rf *.o clang.c clang clang.ps diff --git a/examples/clang/clang.rl b/examples/clang/clang.rl new file mode 100644 index 00000000..7ecfeefd --- /dev/null +++ b/examples/clang/clang.rl @@ -0,0 +1,150 @@ +/* + * A mini C-like language scanner. + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +%%{ + machine clang; + + newline = '\n' @{curline += 1;}; + any_count_line = any | newline; + + # Consume a C comment. + c_comment := any_count_line* :>> '*/' @{fgoto main;}; + + main := |* + + # Alpha numberic characters or underscore. + alnum_u = alnum | '_'; + + # Alpha charactres or underscore. + alpha_u = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + ( punct - [_'"] ) { + printf( "symbol(%i): %c\n", curline, tokstart[0] ); + }; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + alpha_u alnum_u* { + printf( "ident(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Single Quote. + sliteralChar = [^'\\] | newline | ( '\\' . any_count_line ); + '\'' . sliteralChar* . '\'' { + printf( "single_lit(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Double Quote. + dliteralChar = [^"\\] | newline | ( '\\' any_count_line ); + '"' . dliteralChar* . '"' { + printf( "double_lit(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Whitespace is standard ws, newlines and control codes. + any_count_line - 0x21..0x7e; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + '//' [^\n]* newline; + + '/*' { fgoto c_comment; }; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + digit+ { + printf( "int(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + digit+ '.' digit+ { + printf( "float(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + '0x' xdigit+ { + printf( "hex(%i): ", curline ); + fwrite( tokstart, 1, tokend-tokstart, stdout ); + printf("\n"); + }; + + *|; +}%% + +%% write data nofinal; + +#define BUFSIZE 128 + +void scanner() +{ + static char buf[BUFSIZE]; + int cs, act, have = 0, curline = 1; + char *tokstart, *tokend = 0; + int done = 0; + + %% write init; + + while ( !done ) { + char *p = buf + have, *pe; + int len, space = BUFSIZE - have; + + if ( space == 0 ) { + /* We've used up the entire buffer storing an already-parsed token + * prefix that must be preserved. */ + fprintf(stderr, "OUT OF BUFFER SPACE\n" ); + exit(1); + } + + len = fread( p, 1, space, stdin ); + + /* If this is the last buffer, tack on an EOF. */ + if ( len < space ) { + p[len++] = 0; + done = 1; + } + + pe = p + len; + %% write exec; + + if ( cs == clang_error ) { + fprintf(stderr, "PARSE ERROR\n" ); + break; + } + + if ( tokstart == 0 ) + have = 0; + else { + /* There is a prefix to preserve, shift it over. */ + have = pe - tokstart; + memmove( buf, tokstart, have ); + tokend = buf + (tokend-tokstart); + tokstart = buf; + } + } +} + +int main() +{ + scanner(); + return 0; +} + diff --git a/examples/concurrent/Makefile b/examples/concurrent/Makefile new file mode 100644 index 00000000..b9a09f6e --- /dev/null +++ b/examples/concurrent/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: concurrent + +ps: concurrent.ps + +concurrent: concurrent.o + g++ -g -o concurrent concurrent.o + +concurrent.cpp: concurrent.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) concurrent.rl | $(RLCODEGEN) -G2 -o concurrent.cpp + +concurrent.ps: concurrent.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) concurrent.rl | $(RLCODEGEN) -V | dot -Tps > concurrent.ps + +%.o: %.cpp + g++ -Wall -g -c -O3 -o $@ $< + +distclean clean: + rm -Rf *.o concurrent.cpp concurrent concurrent.ps diff --git a/examples/concurrent/concurrent.rl b/examples/concurrent/concurrent.rl new file mode 100644 index 00000000..b70fd5df --- /dev/null +++ b/examples/concurrent/concurrent.rl @@ -0,0 +1,126 @@ +/* + * Show off concurrent abilities. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +struct Concurrent +{ + int cur_char; + int start_word; + int start_comment; + int start_literal; + + int cs; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine Concurrent; + + action next_char { + cur_char += 1; + } + + action start_word { + start_word = cur_char; + } + action end_word { + cout << "word: " << start_word << + " " << cur_char-1 << endl; + } + + action start_comment { + start_comment = cur_char; + } + action end_comment { + cout << "comment: " << start_comment << + " " << cur_char-1 << endl; + } + + action start_literal { + start_literal = cur_char; + } + action end_literal { + cout << "literal: " << start_literal << + " " << cur_char-1 << endl; + } + + # Count characters. + chars = ( any @next_char )*; + + # Words are non-whitespace. + word = ( any-space )+ >start_word %end_word; + words = ( ( word | space ) $1 %0 )*; + + # Finds C style comments. + comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment; + comments = ( comment | any )**; + + # Finds single quoted strings. + literalChar = ( any - ['\\] ) | ( '\\' . any ); + literal = ('\'' literalChar* '\'' ) >start_literal %end_literal; + literals = ( ( literal | (any-'\'') ) $1 %0 )*; + + main := chars | words | comments | literals; +}%% + +%% write data; + +int Concurrent::init( ) +{ + %% write init; + cur_char = 0; + return 1; +} + +int Concurrent::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + + if ( cs == Concurrent_error ) + return -1; + if ( cs >= Concurrent_first_final ) + return 1; + return 0; +} + +int Concurrent::finish( ) +{ + %% write eof; + if ( cs == Concurrent_error ) + return -1; + if ( cs >= Concurrent_first_final ) + return 1; + return 0; +} + +Concurrent concurrent; +char buf[BUFSIZE]; + +int main() +{ + concurrent.init(); + while ( 1 ) { + int len = fread( buf, 1, BUFSIZE, stdin ); + concurrent.execute( buf, len ); + if ( len != BUFSIZE ) + break; + } + + if ( concurrent.finish() <= 0 ) + cerr << "concurrent: error parsing input" << endl; + return 0; +} diff --git a/examples/cppscan/Makefile b/examples/cppscan/Makefile new file mode 100644 index 00000000..6a92c82a --- /dev/null +++ b/examples/cppscan/Makefile @@ -0,0 +1,41 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen +FLEX = flex +RE2C = re2c + +CFLAGS = -Wall -g -O3 + +all: cppscan lex-cppscan re2c-cppscan + +ps: cppscan.ps + +cppscan: cppscan.o + g++ -g -o $@ $< + +lex-cppscan: lex-cppscan.o + g++ -g -o $@ $< + +re2c-cppscan: re2c-cppscan.o + g++ -g -o $@ $< + +cppscan.cpp: cppscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) cppscan.rl | $(RLCODEGEN) -G2 -o $@ + +lex-cppscan.cpp: cppscan.lex + $(FLEX) -f -o $@ $< + +re2c-cppscan.cpp: cppscan.rec + $(RE2C) -s $< > $@ + +example.cpp: example.rec + $(RE2C) -s $< > $@ + +%.o: %.cpp + g++ $(CFLAGS) -c -o $@ $< + +cppscan.ps: cppscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) cppscan.rl | $(RLCODEGEN) -V | dot -Tps > cppscan.ps + +distclean clean: + rm -Rf *.o cppscan.cpp cppscan cppscan.ps \ + lex-cppscan lex-cppscan.cpp re2c-cppscan re2c-cppscan.cpp diff --git a/examples/cppscan/cppscan.lex b/examples/cppscan/cppscan.lex new file mode 100644 index 00000000..fb662538 --- /dev/null +++ b/examples/cppscan/cppscan.lex @@ -0,0 +1,143 @@ +/* + * flex equivalent to cppscan.rl + */ + +%{ + +#include <stdio.h> + +#define TK_Dlit 256 +#define TK_Slit 257 +#define TK_Float 258 +#define TK_Id 259 +#define TK_NameSep 260 +#define TK_Arrow 261 +#define TK_PlusPlus 262 +#define TK_MinusMinus 263 +#define TK_ArrowStar 264 +#define TK_DotStar 265 +#define TK_ShiftLeft 266 +#define TK_ShiftRight 267 +#define TK_IntegerDecimal 268 +#define TK_IntegerOctal 269 +#define TK_IntegerHex 270 +#define TK_EqualsEquals 271 +#define TK_NotEquals 272 +#define TK_AndAnd 273 +#define TK_OrOr 274 +#define TK_MultAssign 275 +#define TK_DivAssign 276 +#define TK_PercentAssign 277 +#define TK_PlusAssign 278 +#define TK_MinusAssign 279 +#define TK_AmpAssign 280 +#define TK_CaretAssign 281 +#define TK_BarAssign 282 +#define TK_DotDotDot 283 +#define TK_Whitespace 284 +#define TK_Comment 285 + +int line = 1, col = 1; + +void token( int tok, char *data, int len ) +{ + printf( "<%i> ", tok ); + for ( int i = 0; i < len; i++ ) + fputc( data[i], stdout ); + fputc( '\n', stdout ); + + /* Count newlines and columns. This code is here mainly for having some + * code in the token routine when commenting out the above output during + * performance testing. */ + for ( int i = 0; i < len; i ++ ) { + if ( data[i] == '\n' ) { + line += 1; + col = 1; + } + else { + col += 1; + } + } +} + + +%} + +%x COMMENT + +FRACT_CONST [0-9]*\.[0-9]+|[0-9]+\. +EXPONENT [eE][+\-]?[0-9]+ +FLOAT_SUFFIX [flFL] + +%% + + /* Single and double literals. */ +L?\'([^\'\\\n]|\\.)*\' { + token( TK_Slit, yytext, yyleng ); +} + +L?\"([^\"\\\n]|\\.)*\" { + token( TK_Dlit, yytext, yyleng ); +} + +[a-zA-Z_][a-zA-Z0-9_]* { + token( TK_Id, yytext, yyleng ); +} + +{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? { + token( TK_Float, yytext, yyleng ); +} + +(0|[1-9][0-9]*)[ulUL]{0,3} { + token( TK_IntegerDecimal, yytext, yyleng ); +} + +0[0-9]+[ulUL]{0,2} { + token( TK_IntegerOctal, yytext, yyleng ); +} + +0x[0-9a-fA-F]+[ulUL]{0,2} { + token( TK_IntegerHex, yytext, yyleng ); +} + +:: token( TK_NameSep, yytext, yyleng ); +== token( TK_EqualsEquals, yytext, yyleng ); +!= token( TK_NotEquals, yytext, yyleng ); +&& token( TK_AndAnd, yytext, yyleng ); +\|\| token( TK_OrOr, yytext, yyleng ); +\*= token( TK_MultAssign, yytext, yyleng ); +\/= token( TK_DivAssign, yytext, yyleng ); +%= token( TK_PercentAssign, yytext, yyleng ); +\+= token( TK_PlusAssign, yytext, yyleng ); +-= token( TK_MinusAssign, yytext, yyleng ); +&= token( TK_AmpAssign, yytext, yyleng ); +^= token( TK_CaretAssign, yytext, yyleng ); +\|= token( TK_BarAssign, yytext, yyleng ); +\+\+ token( TK_PlusPlus, yytext, yyleng ); +-- token( TK_MinusMinus, yytext, yyleng ); +-> token( TK_Arrow, yytext, yyleng ); +->\* token( TK_ArrowStar, yytext, yyleng ); +\.\* token( TK_DotStar, yytext, yyleng ); +\.\.\. token( TK_DotDotDot, yytext, yyleng ); + +\/\* BEGIN(COMMENT); +<COMMENT>\*\/ BEGIN(INITIAL); +<COMMENT>(.|\n) { } + +\/\/.*\n {} +[^!-~]+ {} + +[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng ); + +%% + +int yywrap() +{ + /* Once the input is done, no more. */ + return 1; +} + +int main() +{ + yylex(); +} diff --git a/examples/cppscan/cppscan.rec b/examples/cppscan/cppscan.rec new file mode 100644 index 00000000..43f297d8 --- /dev/null +++ b/examples/cppscan/cppscan.rec @@ -0,0 +1,183 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#define TK_Dlit 256 +#define TK_Slit 257 +#define TK_Float 258 +#define TK_Id 259 +#define TK_NameSep 260 +#define TK_Arrow 261 +#define TK_PlusPlus 262 +#define TK_MinusMinus 263 +#define TK_ArrowStar 264 +#define TK_DotStar 265 +#define TK_ShiftLeft 266 +#define TK_ShiftRight 267 +#define TK_IntegerDecimal 268 +#define TK_IntegerOctal 269 +#define TK_IntegerHex 270 +#define TK_EqualsEquals 271 +#define TK_NotEquals 272 +#define TK_AndAnd 273 +#define TK_OrOr 274 +#define TK_MultAssign 275 +#define TK_DivAssign 276 +#define TK_PercentAssign 277 +#define TK_PlusAssign 278 +#define TK_MinusAssign 279 +#define TK_AmpAssign 280 +#define TK_CaretAssign 281 +#define TK_BarAssign 282 +#define TK_DotDotDot 283 +#define TK_Whitespace 284 +#define TK_Comment 285 + +int line = 1, col = 1; + +void token( int tok, char *data, int len ) +{ + printf( "<%i> ", tok ); + for ( int i = 0; i < len; i++ ) + fputc( data[i], stdout ); + fputc( '\n', stdout ); + + /* Count newlines and columns. This code is here mainly for having some + * code in the token routine when commenting out the above output during + * performance testing. */ + for ( int i = 0; i < len; i ++ ) { + if ( data[i] == '\n' ) { + line += 1; + col = 1; + } + else { + col += 1; + } + } +} + +#define BUFSIZE 8192 +char buf[BUFSIZE]; + +void fill( int n ) +{ + printf("fill(%i)\n", n); + exit(1); +} + +int main() +{ + char *start, *p = buf, *lim = buf, *marker; + int len, have, want, shift; + int done = 0; + +#define YYCTYPE char + +#define YYCURSOR p +#define YYLIMIT lim +#define YYMARKER marker + +#define YYFILL(n) { \ + if ( ! done ) { \ + have = lim-start; \ + if ( start > buf ) { \ + shift = start-buf; \ + memmove( buf, start, have ); \ + start -= shift; \ + p -= shift; \ + lim -= shift; \ + marker -= shift; \ + } \ + want = BUFSIZE - have - 1; \ + len = fread( lim, 1, want, stdin ); \ + lim += len; \ + if ( len < want ) { \ + *lim++ = 0; \ + done = 1; \ + } \ + } \ + } + +again: + start = p; + +/*!re2c + +ANY = [\000-\377]; +FRACTCONST = ( [0-9]* "." [0-9]+ ) | [0-9]+ "."; +EXPONENT = [eE] [+\-]? [0-9]+; +FLOATSUFFIX = [flFL]; + + "L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" { + token( TK_Slit, start, p-start ); + goto again; + } + + "L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" { + token( TK_Dlit, start, p-start ); + goto again; + } + + [a-zA-Z_][a-zA-Z0-9_]* { + token( TK_Id, start, p-start ); + goto again; + } + + ( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) { + token( TK_Float, start, p-start ); + goto again; + } + + + ( "0" | [1-9][0-9]* ) [ulUL]* { + token( TK_IntegerDecimal, start, p-start ); + goto again; + } + + "0" [0-9]+ [ulUL]* { + token( TK_IntegerOctal, start, p-start ); + goto again; + } + + "0x" [0-9a-fA-F]+[ulUL]* { + token( TK_IntegerHex, start, p-start ); + goto again; + } + + "::" { token( TK_NameSep, start, p-start ); goto again; } + "==" { token( TK_EqualsEquals, start, p-start ); goto again; } + "!=" { token( TK_NotEquals, start, p-start ); goto again; } + "&&" { token( TK_AndAnd, start, p-start ); goto again; } + "||" { token( TK_OrOr, start, p-start ); goto again; } + "*=" { token( TK_MultAssign, start, p-start ); goto again; } + "/=" { token( TK_DivAssign, start, p-start ); goto again; } + "%=" { token( TK_PercentAssign, start, p-start ); goto again; } + "+=" { token( TK_PlusAssign, start, p-start ); goto again; } + "-=" { token( TK_MinusAssign, start, p-start ); goto again; } + "&=" { token( TK_AmpAssign, start, p-start ); goto again; } + "^=" { token( TK_CaretAssign, start, p-start ); goto again; } + "|=" { token( TK_BarAssign, start, p-start ); goto again; } + "++" { token( TK_PlusPlus, start, p-start ); goto again; } + "--" { token( TK_MinusMinus, start, p-start ); goto again; } + "->" { token( TK_Arrow, start, p-start ); goto again; } + "->*" { token( TK_ArrowStar, start, p-start ); goto again; } + ".*" { token( TK_DotStar, start, p-start ); goto again; } + "..." { token( TK_DotDotDot, start, p-start ); goto again; } + + "/*" { goto comment; } + "//" (ANY\"\n")* "\n" { goto again; } + [\001-\040\177]+ { goto again; } + + [\041-\057\072-\100\133-\140\173-\176] { + token( *start, start, p-start ); + goto again; + } + "\000" { return 0; } +*/ + +comment: +/*!re2c + "*/" { goto again; } + ANY { goto comment; } +*/ +} diff --git a/examples/cppscan/cppscan.rl b/examples/cppscan/cppscan.rl new file mode 100644 index 00000000..5c979ebe --- /dev/null +++ b/examples/cppscan/cppscan.rl @@ -0,0 +1,207 @@ +/* + * A C++ scanner. Uses the longest match construction. + * << <= <<= >> >= >>= are left out since angle brackets are used in templates. + */ + +#include <string.h> +#include <stdlib.h> +#include <iostream> + +#define TK_Dlit 256 +#define TK_Slit 257 +#define TK_Float 258 +#define TK_Id 259 +#define TK_NameSep 260 +#define TK_Arrow 261 +#define TK_PlusPlus 262 +#define TK_MinusMinus 263 +#define TK_ArrowStar 264 +#define TK_DotStar 265 +#define TK_ShiftLeft 266 +#define TK_ShiftRight 267 +#define TK_IntegerDecimal 268 +#define TK_IntegerOctal 269 +#define TK_IntegerHex 270 +#define TK_EqualsEquals 271 +#define TK_NotEquals 272 +#define TK_AndAnd 273 +#define TK_OrOr 274 +#define TK_MultAssign 275 +#define TK_DivAssign 276 +#define TK_PercentAssign 277 +#define TK_PlusAssign 278 +#define TK_MinusAssign 279 +#define TK_AmpAssign 280 +#define TK_CaretAssign 281 +#define TK_BarAssign 282 +#define TK_DotDotDot 283 +#define TK_Whitespace 284 +#define TK_Comment 285 + +#define BUFSIZE 16384 + +/* EOF char used to flush out that last token. This should be a whitespace + * token. */ + +#define LAST_CHAR 0 + +using std::cerr; +using std::cout; +using std::cin; +using std::endl; + +static char buf[BUFSIZE]; +static int line = 1, col = 1; +static char *tokstart, *tokend; +static int act, have = 0; +static int cs; + +%%{ + machine Scanner; + write data nofinal; + + # Floating literals. + fract_const = digit* '.' digit+ | digit+ '.'; + exponent = [eE] [+\-]? digit+; + float_suffix = [flFL]; + + c_comment := + any* :>> '*/' + @{ fgoto main; }; + + main := |* + + # Single and double literals. + ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) + {token( TK_Slit );}; + ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) + {token( TK_Dlit );}; + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) + {token( TK_Id );}; + + # Floating literals. + ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? ) + {token( TK_Float );}; + + # Integer decimal. Leading part buffered by float. + ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) + {token( TK_IntegerDecimal );}; + + # Integer octal. Leading part buffered by float. + ( '0' [0-9]+ [ulUL]{0,2} ) + {token( TK_IntegerOctal );}; + + # Integer hex. Leading 0 buffered by float. + ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) + {token( TK_IntegerHex );}; + + # Only buffer the second item, first buffered by symbol. */ + '::' {token( TK_NameSep );}; + '==' {token( TK_EqualsEquals );}; + '!=' {token( TK_NotEquals );}; + '&&' {token( TK_AndAnd );}; + '||' {token( TK_OrOr );}; + '*=' {token( TK_MultAssign );}; + '/=' {token( TK_DivAssign );}; + '%=' {token( TK_PercentAssign );}; + '+=' {token( TK_PlusAssign );}; + '-=' {token( TK_MinusAssign );}; + '&=' {token( TK_AmpAssign );}; + '^=' {token( TK_CaretAssign );}; + '|=' {token( TK_BarAssign );}; + '++' {token( TK_PlusPlus );}; + '--' {token( TK_MinusMinus );}; + '->' {token( TK_Arrow );}; + '->*' {token( TK_ArrowStar );}; + '.*' {token( TK_DotStar );}; + + # Three char compounds, first item already buffered. */ + '...' {token( TK_DotDotDot );}; + + # Single char symbols. + ( punct - [_"'] ) {token( tokstart[0] );}; + + # Comments and whitespace. + '/*' { fgoto c_comment; }; + '//' [^\n]* '\n'; + ( any - 33..126 )+; + + *|; +}%% + +void token( int tok ) +{ + char *data = tokstart; + int len = tokend - tokstart; + + cout << '<' << tok << "> "; + cout.write( data, len ); + cout << '\n'; + + /* Count newlines and columns. This code is here mainly for having some + * code in the token routine when commenting out the above output during + * performance testing. */ + for ( int i = 0; i < len; i ++ ) { + if ( data[i] == '\n' ) { + line += 1; + col = 1; + } + else { + col += 1; + } + } +} + +int main() +{ + std::ios::sync_with_stdio(false); + + %% write init; + + /* Do the first read. */ + bool done = false; + while ( !done ) { + char *p = buf + have; + int space = BUFSIZE - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + cerr << "OUT OF BUFFER SPACE" << endl; + exit(1); + } + + cin.read( p, space ); + int len = cin.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = LAST_CHAR, len++; + done = true; + } + + char *pe = p + len; + %% write exec; + + /* Check if we failed. */ + if ( cs == Scanner_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } + + /* Now set up the prefix. */ + if ( tokstart == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - tokstart; + memmove( buf, tokstart, have ); + tokend -= (tokstart-buf); + tokstart = buf; + } + } + + return 0; +} diff --git a/examples/format/Makefile b/examples/format/Makefile new file mode 100644 index 00000000..d5ac829b --- /dev/null +++ b/examples/format/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: format + +ps: format.ps + +format: format.o + gcc -g -o format format.o + +format.c: format.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) format.rl | $(RLCODEGEN) -G2 -o format.c + +format.ps: format.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) format.rl | $(RLCODEGEN) -V | dot -Tps > format.ps + +%.o: %.c + gcc -Wall -O3 -g -c -o $@ $< + +distclean clean: + rm -Rf *.o format.c format format.ps diff --git a/examples/format/format.rl b/examples/format/format.rl new file mode 100644 index 00000000..ea5fdfb5 --- /dev/null +++ b/examples/format/format.rl @@ -0,0 +1,191 @@ +/* + * Partial printf implementation. + */ + +#define BUFLEN 1024 +#include <stdio.h> + +typedef void (*WriteFunc)( char *data, int len ); + +struct format +{ + char buf[BUFLEN+1]; + int buflen; + WriteFunc write; + + int flags; + int width; + int prec; + int cs; +}; + +void do_conv( struct format *fsm, char c ) +{ + printf( "flags: %x\n", fsm->flags ); + printf( "width: %i\n", fsm->width ); + printf( "prec: %i\n", fsm->prec ); + printf( "conv: %c\n", c ); + printf( "\n" ); +} + +#define FL_HASH 0x01 +#define FL_ZERO 0x02 +#define FL_DASH 0x04 +#define FL_SPACE 0x08 +#define FL_PLUS 0x10 + +#define FL_HAS_WIDTH 0x0100 +#define FL_WIDTH_ARG 0x0200 +#define FL_HAS_PREC 0x0400 +#define FL_PREC_ARG 0x0800 + +#define FL_LEN_H 0x010000 +#define FL_LEN_HH 0x020000 +#define FL_LEN_L 0x040000 +#define FL_LEN_LL 0x080000 + +%%{ + machine format; + access fsm->; + + action clear { + fsm->flags = 0; + fsm->width = 0; + fsm->prec = 0; + } + + # A non-zero number. + nznum = [1-9] [0-9]*; + + # Width + action width_num { fsm->width = 10 * fsm->width + (fc-'0'); } + action width_arg { fsm->flags |= FL_WIDTH_ARG; } + action width { fsm->flags |= FL_HAS_WIDTH; } + width = ( ( nznum $width_num | '*' @width_arg ) %width )?; + + # Precision + action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); } + action prec_arg { fsm->flags |= FL_PREC_ARG; } + action prec { fsm->flags |= FL_HAS_PREC; } + precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?; + + # Flags + action flags_hash { fsm->flags |= FL_HASH; } + action flags_zero { fsm->flags |= FL_ZERO; } + action flags_dash { fsm->flags |= FL_DASH; } + action flags_space { fsm->flags |= FL_SPACE; } + action flags_plus { fsm->flags |= FL_PLUS; } + + flags = ( + '#' @flags_hash | + '0' @flags_zero | + '-' @flags_dash | + ' ' @flags_space | + '+' @flags_plus )*; + + action length_h { fsm->flags |= FL_LEN_H; } + action length_l { fsm->flags |= FL_LEN_L; } + action length_hh { fsm->flags |= FL_LEN_HH; } + action length_ll { fsm->flags |= FL_LEN_LL; } + + # Must use leaving transitions on 'h' and 'l' because they are + # prefixes for 'hh' and 'll'. + length = ( + 'h' %length_h | + 'l' %length_l | + 'hh' @length_hh | + 'll' @length_ll )?; + + action conversion { + do_conv( fsm, fc ); + } + + conversion = [diouxXcsp] @conversion; + + fmt_spec = + '%' @clear + flags + width + precision + length + conversion; + + action emit { + if ( fsm->buflen == BUFLEN ) { + fsm->write( fsm->buf, fsm->buflen ); + fsm->buflen = 0; + } + fsm->buf[fsm->buflen++] = fc; + } + + action finish_ok { + if ( fsm->buflen > 0 ) + fsm->write( fsm->buf, fsm->buflen ); + } + action finish_err { + printf("EOF IN FORMAT\n"); + } + action err_char { + printf("ERROR ON CHAR: 0x%x\n", fc ); + } + + main := ( + [^%] @emit | + '%%' @emit | + fmt_spec + )* @/finish_err %/finish_ok $!err_char; +}%% + +%% write data; + +void format_init( struct format *fsm ) +{ + fsm->buflen = 0; + %% write init; +} + +void format_execute( struct format *fsm, const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; +} + +int format_finish( struct format *fsm ) +{ + %% write eof; + + if ( fsm->cs == format_error ) + return -1; + if ( fsm->cs >= format_first_final ) + return 1; + return 0; +} + + +#define INPUT_BUFSIZE 2048 + +struct format fsm; +char buf[INPUT_BUFSIZE]; + +void write(char *data, int len ) +{ + fwrite( data, 1, len, stdout ); +} + +int main() +{ + fsm.write = write; + format_init( &fsm ); + while ( 1 ) { + int len = fread( buf, 1, INPUT_BUFSIZE, stdin ); + format_execute( &fsm, buf, len ); + if ( len != INPUT_BUFSIZE ) + break; + } + if ( format_finish( &fsm ) <= 0 ) + printf("FAIL\n"); + return 0; +} + diff --git a/examples/gotocallret/Makefile b/examples/gotocallret/Makefile new file mode 100644 index 00000000..13f9818d --- /dev/null +++ b/examples/gotocallret/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: gotocallret + +ps: gotocallret.ps + +gotocallret: gotocallret.o + g++ -g -o gotocallret gotocallret.o + +gotocallret.cpp: gotocallret.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) gotocallret.rl | $(RLCODEGEN) -G2 -o gotocallret.cpp + +gotocallret.o: gotocallret.cpp + g++ -Wall -g -c -O3 -o $@ $< + +gotocallret.ps: gotocallret.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) gotocallret.rl | $(RLCODEGEN) -V | dot -Tps > gotocallret.ps + +distclean clean: + rm -Rf *.o gotocallret.cpp gotocallret gotocallret.ps diff --git a/examples/gotocallret/gotocallret.rl b/examples/gotocallret/gotocallret.rl new file mode 100644 index 00000000..84384a9c --- /dev/null +++ b/examples/gotocallret/gotocallret.rl @@ -0,0 +1,103 @@ +/* + * Demonstrate the use of goto, call and return. This machine expects either a + * lower case char or a digit as a command then a space followed by the command + * arg. If the command is a char, then the arg must be an a string of chars. + * If the command is a digit, then the arg must be a string of digits. This + * choice is determined by action code, rather than though transition + * desitinations. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +struct GotoCallRet +{ + char comm; + int cs, top, stack[32]; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine GotoCallRet; + + # Error machine, consumes to end of + # line, then starts the main line over. + garble_line := ( + (any-'\n')*'\n' + ) >{cout << "error: garbling line" << endl;} @{fgoto main;}; + + # Look for a string of alphas or of digits, + # on anything else, hold the character and return. + alp_comm := alpha+ $!{fhold;fret;}; + dig_comm := digit+ $!{fhold;fret;}; + + # Choose which to machine to call into based on the command. + action comm_arg { + if ( comm >= 'a' ) + fcall alp_comm; + else + fcall dig_comm; + } + + # Specifies command string. Note that the arg is left out. + command = ( + [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n' + ) @{cout << "correct command" << endl;}; + + # Any number of commands. If there is an + # error anywhere, garble the line. + main := command* $!{fhold;fgoto garble_line;}; +}%% + +%% write data; + +int GotoCallRet::init( ) +{ + %% write init; + return 1; +} + +int GotoCallRet::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + if ( cs == GotoCallRet_error ) + return -1; + if ( cs >= GotoCallRet_first_final ) + return 1; + return 0; +} + +int GotoCallRet::finish( ) +{ + %% write eof; + if ( cs == GotoCallRet_error ) + return -1; + if ( cs >= GotoCallRet_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 1024 + +int main() +{ + char buf[BUFSIZE]; + + GotoCallRet gcr; + gcr.init(); + while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { + gcr.execute( buf, strlen(buf) ); + } + if ( gcr.finish() <= 0 ) + cerr << "gotocallret: error: parsing input" << endl; + return 0; +} diff --git a/examples/mailbox/Makefile b/examples/mailbox/Makefile new file mode 100644 index 00000000..94d66800 --- /dev/null +++ b/examples/mailbox/Makefile @@ -0,0 +1,16 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: mailbox + +mailbox: mailbox.o + g++ -g -o mailbox mailbox.o + +mailbox.cpp: mailbox.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) mailbox.rl | $(RLCODEGEN) -G2 -o mailbox.cpp + +%.o: %.cpp + g++ -Wall -g -c -O3 -o $@ $< + +distclean clean: + rm -Rf *.o mailbox.cpp mailbox mailbox.ps diff --git a/examples/mailbox/mailbox.rl b/examples/mailbox/mailbox.rl new file mode 100644 index 00000000..74e33108 --- /dev/null +++ b/examples/mailbox/mailbox.rl @@ -0,0 +1,206 @@ +/* + * Parses unix mail boxes into headers and bodies. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +#define BUFSIZE 2048 + +/* A growable buffer for collecting headers. */ +struct Buffer +{ + Buffer() : data(0), allocated(0), length(0) { } + ~Buffer() { empty(); } + + void append( char p ) { + if ( ++length > allocated ) + upAllocate( length*2 ); + data[length-1] = p; + } + + void clear() { length = 0; } + void upAllocate( int len ); + void empty(); + + char *data; + int allocated; + int length; +}; + + +struct MailboxScanner +{ + Buffer headName; + Buffer headContent; + + int cs, top, stack[1]; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine MailboxScanner; + + # Buffer the header names. + action bufHeadName { headName.append(fc); } + + # Prints a blank line after the end of the headers of each message. + action blankLine { cout << endl; } + + # Helpers we will use in matching the date section of the from line. + day = /[A-Z][a-z][a-z]/; + month = /[A-Z][a-z][a-z]/; + year = /[0-9][0-9][0-9][0-9]/; + time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' ); + letterZone = /[A-Z][A-Z][A-Z]/; + numZone = /[+\-][0-9][0-9][0-9][0-9]/; + zone = letterZone | numZone; + dayNum = /[0-9 ][0-9]/; + + # These are the different formats of the date minus an obscure + # type that has a funny string 'remote from xxx' on the end. Taken + # from c-client in the imap-2000 distribution. + date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' . + ( year | year . ' ' . zone | zone . ' ' . year ); + + # From lines separate messages. We will exclude fromLine from a message + # body line. This will cause us to stay in message line up until an + # entirely correct from line is matched. + fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n'; + + # The types of characters that can be used as a header name. + hchar = print - [ :]; + + # Simply eat up an uninteresting header. Return at the first non-ws + # character following a newline. + consumeHeader := ( + [^\n] | + '\n' [ \t] | + '\n' [^ \t] @{fhold; fret;} + )*; + + action hchar {headContent.append(fc);} + action hspace {headContent.append(' ');} + + action hfinish { + headContent.append(0); + cout << headContent.data << endl; + headContent.clear(); + fhold; + fret; + } + + # Display the contents of a header as it is consumed. Collapses line + # continuations to a single space. + printHeader := ( + [^\n] @hchar | + ( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace + )** $!hfinish; + + action onHeader + { + headName.append(0); + if ( strcmp( headName.data, "From" ) == 0 || + strcmp( headName.data, "To" ) == 0 || + strcmp( headName.data, "Subject" ) == 0 ) + { + /* Print the header name, then jump to a machine the will display + * the contents. */ + cout << headName.data << ":"; + headName.clear(); + fcall printHeader; + } + + headName.clear(); + fcall consumeHeader; + } + + header = hchar+ $bufHeadName ':' @onHeader; + + # Exclude fromLine from a messageLine, otherwise when encountering a + # fromLine we will be simultaneously matching the old message and a new + # message. + messageLine = ( [^\n]* '\n' - fromLine ); + + # An entire message. + message = ( fromLine . header* . '\n' @blankLine . messageLine* ); + + # File is a series of messages. + main := message*; +}%% + +%% write data; + +int MailboxScanner::init( ) +{ + %% write init; + return 1; +} + +int MailboxScanner::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + + if ( cs == MailboxScanner_error ) + return -1; + if ( cs >= MailboxScanner_first_final ) + return 1; + return 0; +} + +int MailboxScanner::finish( ) +{ + %% write eof; + if ( cs == MailboxScanner_error ) + return -1; + if ( cs >= MailboxScanner_first_final ) + return 1; + return 0; +} + + +void Buffer::empty() +{ + if ( data != 0 ) { + free( data ); + + data = 0; + length = 0; + allocated = 0; + } +} + +void Buffer::upAllocate( int len ) +{ + if ( data == 0 ) + data = (char*) malloc( len ); + else + data = (char*) realloc( data, len ); + allocated = len; +} + +MailboxScanner mailbox; +char buf[BUFSIZE]; + +int main() +{ + mailbox.init(); + while ( 1 ) { + int len = fread( buf, 1, BUFSIZE, stdin ); + mailbox.execute( buf, len ); + if ( len != BUFSIZE ) + break; + } + if ( mailbox.finish() <= 0 ) + cerr << "mailbox: error parsing input" << endl; + return 0; +} diff --git a/examples/params/Makefile b/examples/params/Makefile new file mode 100644 index 00000000..98b950ca --- /dev/null +++ b/examples/params/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: params + +ps: params.ps + +params: params.o + gcc -g -o params params.o + +params.c: params.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) params.rl | $(RLCODEGEN) -G2 -o params.c + +params.ps: params.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) params.rl | $(RLCODEGEN) -V | dot -Tps > params.ps + +%.o: %.c + gcc -Wall -O3 -g -c -o $@ $< + +distclean clean: + rm -Rf *.o params.c params params.ps diff --git a/examples/params/params.rl b/examples/params/params.rl new file mode 100644 index 00000000..3cf908ff --- /dev/null +++ b/examples/params/params.rl @@ -0,0 +1,104 @@ +/* + * Parse command line arguments. + */ + +#include <stdio.h> +#include <string.h> + +#define BUFLEN 1024 + +struct params +{ + char buffer[BUFLEN+1]; + int buflen; + int cs; +}; + +%%{ + machine params; + access fsm->; + + # A buffer to collect argurments + + # Append to the buffer. + action append { + if ( fsm->buflen < BUFLEN ) + fsm->buffer[fsm->buflen++] = fc; + } + + # Terminate a buffer. + action term { + if ( fsm->buflen < BUFLEN ) + fsm->buffer[fsm->buflen++] = 0; + } + + # Clear out the buffer + action clear { fsm->buflen = 0; } + + action help { printf("help\n"); } + action version { printf("version\n"); } + action output { printf("output: \"%s\"\n", fsm->buffer); } + action spec { printf("spec: \"%s\"\n", fsm->buffer); } + action mach { printf("machine: \"%s\"\n", fsm->buffer); } + + # Helpers that collect strings + string = [^\0]+ >clear $append %term; + + # Different arguments. + help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help; + version = ( '-v' | '--version' ) 0 @version; + output = '-o' 0? string 0 @output; + spec = '-S' 0? string 0 @spec; + mach = '-M' 0? string 0 @mach; + + main := ( + help | + version | + output | + spec | + mach + )*; +}%% + +%% write data; + +void params_init( struct params *fsm ) +{ + fsm->buflen = 0; + %% write init; +} + +void params_execute( struct params *fsm, const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; +} + +int params_finish( struct params *fsm ) +{ + %% write eof; + + if ( fsm->cs == params_error ) + return -1; + if ( fsm->cs >= params_first_final ) + return 1; + return 0; +} + +#define BUFSIZE 2048 + +int main( int argc, char **argv ) +{ + int a; + struct params params; + + params_init( ¶ms ); + for ( a = 1; a < argc; a++ ) + params_execute( ¶ms, argv[a], strlen(argv[a])+1 ); + if ( params_finish( ¶ms ) != 1 ) + fprintf( stderr, "params: error processing arguments\n" ); + + return 0; +} diff --git a/examples/pullscan/Makefile b/examples/pullscan/Makefile new file mode 100644 index 00000000..1a048ea1 --- /dev/null +++ b/examples/pullscan/Makefile @@ -0,0 +1,23 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +CFLAGS = -Wall -g -O3 + +all: pullscan + +ps: pullscan.ps + +pullscan: pullscan.o + g++ -g -o $@ $< + +pullscan.c: pullscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) pullscan.rl | $(RLCODEGEN) -G2 -o $@ + +%.o: %.c + gcc $(CFLAGS) -c -o $@ $< + +pullscan.ps: pullscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) pullscan.rl | $(RLCODEGEN) -V | dot -Tps > pullscan.ps + +distclean clean: + rm -Rf *.o pullscan.c pullscan pullscan.ps diff --git a/examples/pullscan/pullscan.rl b/examples/pullscan/pullscan.rl new file mode 100644 index 00000000..79e3c499 --- /dev/null +++ b/examples/pullscan/pullscan.rl @@ -0,0 +1,166 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define BUFSIZE 4096 + +typedef struct _Scanner { + /* Scanner state. */ + int cs; + int act; + int have; + int curline; + char *tokstart; + char *tokend; + char *p; + char *pe; + FILE *file; + int done; + + /* Token data */ + char *data; + int len; + int value; + + char buf[BUFSIZE]; +} Scanner; + + +void scan_init( Scanner *s, FILE *file ) +{ + memset (s, '\0', sizeof(Scanner)); + s->curline = 1; + s->file = file; +} + +#define TK_NO_TOKEN (-1) +#define TK_ERR 128 +#define TK_EOF 129 +#define TK_Identifier 130 +#define TK_Number 131 + + +%%{ + machine Scanner; + write data; +}%% + +#define ret_tok( _tok ) token = _tok; s->data = s->tokstart + +int scan( Scanner *s ) +{ + char *p = s->p; + char *pe = s->pe; + int token = TK_NO_TOKEN; + int space, readlen; + + while ( 1 ) { + if ( p == pe ) { + printf("scanner: need more data\n"); + + if ( s->tokstart == 0 ) + s->have = 0; + else { + /* There is data that needs to be shifted over. */ + printf("scanner: buffer broken mid token\n"); + s->have = pe - s->tokstart; + memmove( s->buf, s->tokstart, s->have ); + s->tokend -= (s->tokstart-s->buf); + s->tokstart = s->buf; + } + + p = s->buf + s->have; + space = BUFSIZE - s->have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + printf("scanner: out of buffer space\n"); + return TK_ERR; + } + + if ( s->done ) { + printf("scanner: end of file\n"); + p[0] = 0; + readlen = 1; + } + else { + readlen = fread( p, 1, space, s->file ); + if ( readlen < space ) + s->done = 1; + } + + pe = p + readlen; + } + + %%{ + machine Scanner; + access s->; + + main := |* + + # Identifiers + ( [a-zA-Z_] [a-zA-Z0-9_]* ) => + { ret_tok( TK_Identifier ); fbreak; }; + + # Whitespace + [ \t\n]; + + # Number + digit+ => + { ret_tok( TK_Number ); fbreak; }; + + # EOF + 0 => + { ret_tok( TK_EOF ); fbreak; }; + + # Anything else + any => + { ret_tok( *p ); fbreak; }; + + *|; + + write exec; + }%% + + if ( s->cs == Scanner_error ) + return TK_ERR; + + if ( token != TK_NO_TOKEN ) { + /* Save p and pe. fbreak does not advance p. */ + s->p = p + 1; + s->pe = pe; + s->len = s->p - s->data; + return token; + } + } +} + + +int main (int argc, char** argv) +{ + Scanner ss; + int tok; + + scan_init(&ss, stdin); + + while ( 1 ) { + tok = scan (&ss); + if ( tok == TK_EOF ) { + printf ("parser: EOF\n"); + break; + } + else if ( tok == TK_ERR ) { + printf ("parser: ERR\n"); + break; + } + else { + printf ("parser: %d \"", tok); + fwrite ( ss.data, 1, ss.len, stdout ); + printf ("\"\n" ); + } + } + + return 0; +} + + diff --git a/examples/rlscan/Makefile b/examples/rlscan/Makefile new file mode 100644 index 00000000..2021d27c --- /dev/null +++ b/examples/rlscan/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: rlscan + +ps: rlscan.ps + +rlscan: rlscan.o + g++ -g -o rlscan rlscan.o + +rlscan.cpp: rlscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) rlscan.rl | $(RLCODEGEN) -G2 -o rlscan.cpp + +%.o: %.cpp + g++ -Wall -g -c -O3 -o $@ $< + +rlscan.ps: rlscan.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) rlscan.rl | $(RLCODEGEN) -V | dot -Tps > rlscan.ps + +distclean clean: + rm -Rf *.o rlscan.cpp rlscan rlscan.ps diff --git a/examples/rlscan/rlscan.rl b/examples/rlscan/rlscan.rl new file mode 100644 index 00000000..f912b8d8 --- /dev/null +++ b/examples/rlscan/rlscan.rl @@ -0,0 +1,298 @@ +/* + * Lexes Ragel input files. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +void escapeXML( char *data ) +{ + while ( *data != 0 ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + data += 1; + } +} + +void escapeXML( char c ) +{ + switch ( c ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << c; break; + } +} + +void escapeXML( char *data, int len ) +{ + for ( char *end = data + len; data != end; data++ ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + } +} + +inline void write( char *data ) +{ + cout << data; +} + +inline void write( char c ) +{ + cout << c; +} + +inline void write( char *data, int len ) +{ + cout.write( data, len ); +} + + +%%{ + machine RagelScan; + + word = [a-zA-Z_][a-zA-Z_0-9]*; + integer = [0-9]+; + hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; + + default = ^0; + EOF = 0; + + # Handles comments in outside code and inline blocks. + c_comment := + ( default* :>> '*/' ) + ${ escapeXML( fc ); } + @{ fret; }; + + action emit { + escapeXML( tokstart, tokend-tokstart ); + } + + # + # Inline action code + # + + ilscan := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + '/*' { + write( "/*" ); + fcall c_comment; + }; + '//' [^\n]* '\n' => emit; + + '{' { + write( '{' ); + inline_depth += 1; + }; + + '}' { + write( '}' ); + /* If dropping down to the last } then return + * to ragel code. */ + if ( --inline_depth == 0 ) { + write( "</inline>\n" ); + fgoto rlscan; + } + }; + + default => { escapeXML( *tokstart ); }; + *|; + + # + # Ragel Tokens + # + + rlscan := |* + '}%%' { + if ( !single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + '\n' { + if ( single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + # Word + word { + write( "<word>" ); + write( tokstart, tokend-tokstart ); + write( "</word>\n" ); + }; + + # Decimal integer. + integer { + write( "<int>" ); + write( tokstart, tokend-tokstart ); + write( "</int>\n" ); + }; + + # Hexidecimal integer. + hex { + write( "<hex>" ); + write( tokstart, tokend-tokstart ); + write( "</hex>\n" ); + }; + + # Consume comments. + '#' [^\n]* '\n'; + + # Single literal string. + "'" ( [^'\\] | /\\./ )* "'" { + write( "<single_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</single_lit>\n" ); + }; + + # Double literal string. + '"' ( [^"\\] | /\\./ )* '"' { + write( "<double_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</double_lit>\n" ); + }; + + # Or literal. + '[' ( [^\]\\] | /\\./ )* ']' { + write( "<or_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</or_lit>\n" ); + }; + + # Regex Literal. + '/' ( [^/\\] | /\\./ ) * '/' { + write( "<re_lit>" ); + escapeXML( tokstart, tokend-tokstart ); + write( "</re_lit>\n" ); + }; + + # Open an inline block + '{' { + inline_depth = 1; + write( "<inline>{" ); + fgoto ilscan; + }; + + punct { + write( "<symbol>" ); + escapeXML( fc ); + write( "</symbol>\n" ); + }; + + default; + *|; + + # + # Outside code. + # + + main := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + + '/*' { + escapeXML( tokstart, tokend-tokstart ); + fcall c_comment; + }; + + '//' [^\n]* '\n' => emit; + + '%%{' { + write( "<section>\n" ); + single_line = false; + fgoto rlscan; + }; + + '%%' { + write( "<section>\n" ); + single_line = true; + fgoto rlscan; + }; + + default { + escapeXML( *tokstart ); + }; + + # EOF. + EOF; + *|; +}%% + +%% write data nofinal; + +#define BUFSIZE 2048 + +int main() +{ + std::ios::sync_with_stdio(false); + + int cs, act; + char *tokstart, *tokend; + int stack[1], top; + + static char inbuf[BUFSIZE]; + bool single_line = false; + int inline_depth = 0; + + %% write init; + + bool done = false; + int have = 0; + while ( !done ) { + /* How much space is in the buffer? */ + int space = BUFSIZE - have; + if ( space == 0 ) { + /* Buffer is full. */ + cerr << "TOKEN TOO BIG" << endl; + exit(1); + } + + /* Read in a block. */ + char *p = inbuf + have; + cin.read( p, space ); + int len = cin.gcount(); + + /* Check for EOF. */ + if ( len == 0 ) { + p[0] = 0, len++; + done = true; + } + + char *pe = p + len; + %% write exec; + + if ( cs == RagelScan_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } + + if ( tokstart == 0 ) + have = 0; + else { + /* There is a prefix to preserve, shift it over. */ + have = pe - tokstart; + memmove( inbuf, tokstart, have ); + tokend = inbuf + (tokend-tokstart); + tokstart = inbuf; + } + } + return 0; +} diff --git a/examples/statechart/Makefile b/examples/statechart/Makefile new file mode 100644 index 00000000..3dec7fd3 --- /dev/null +++ b/examples/statechart/Makefile @@ -0,0 +1,21 @@ +RAGEL = ../../ragel/ragel +RLCODEGEN = ../../rlcodegen/rlcodegen + +all: statechart + +ps: statechart.ps + +statechart: statechart.o + g++ -g -o statechart statechart.o + +statechart.cpp: statechart.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) statechart.rl | $(RLCODEGEN) -G2 -o statechart.cpp + +statechart.o: statechart.cpp + g++ -Wall -g -c -O3 -o $@ $< + +statechart.ps: statechart.rl $(RAGEL) $(RLCODEGEN) + $(RAGEL) statechart.rl | $(RLCODEGEN) -V | dot -Tps > statechart.ps + +distclean clean: + rm -Rf *.o statechart.cpp statechart statechart.ps diff --git a/examples/statechart/statechart.rl b/examples/statechart/statechart.rl new file mode 100644 index 00000000..cb99a203 --- /dev/null +++ b/examples/statechart/statechart.rl @@ -0,0 +1,114 @@ +/* + * Demonstrate the use of labels, the epsilon operator, and the join operator + * for creating machines using the named state and transition list paradigm. + * This implementes the same machine as the atoi example. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> + +using namespace std; + +struct StateChart +{ + bool neg; + int val; + int cs; + + int init( ); + int execute( const char *data, int len ); + int finish( ); +}; + +%%{ + machine StateChart; + + action begin { + neg = false; + val = 0; + } + + action see_neg { + neg = true; + } + + action add_digit { + val = val * 10 + (fc - '0'); + } + + action finish { + if ( neg ) + val = -1 * val; + } + + atoi = ( + start: ( + '-' @see_neg ->om_num | + '+' ->om_num | + [0-9] @add_digit ->more_nums + ), + + # One or more nums. + om_num: ( + [0-9] @add_digit ->more_nums + ), + + # Zero ore more nums. + more_nums: ( + [0-9] @add_digit ->more_nums | + '' -> final + ) + ) >begin %finish; + + main := ( atoi '\n' @{ cout << val << endl; } )*; +}%% + +%% write data; + +int StateChart::init( ) +{ + %% write init; + return 1; +} + +int StateChart::execute( const char *data, int len ) +{ + const char *p = data; + const char *pe = data + len; + + %% write exec; + + if ( cs == StateChart_error ) + return -1; + if ( cs >= StateChart_first_final ) + return 1; + return 0; +} + +int StateChart::finish( ) +{ + %% write eof; + if ( cs == StateChart_error ) + return -1; + if ( cs >= StateChart_first_final ) + return 1; + return 0; +} + + +#define BUFSIZE 1024 + +int main() +{ + char buf[BUFSIZE]; + + StateChart atoi; + atoi.init(); + while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { + atoi.execute( buf, strlen(buf) ); + } + if ( atoi.finish() <= 0 ) + cerr << "statechart: error: parsing input" << endl; + return 0; +} diff --git a/examples/uri/uri.rl b/examples/uri/uri.rl new file mode 100644 index 00000000..185a76c6 --- /dev/null +++ b/examples/uri/uri.rl @@ -0,0 +1,31 @@ +%%{ + machine uri; + + action scheme {} + action loc {} + action item {} + action query {} + action last {} + action nothing {} + + main := + # Scheme machine. This is ambiguous with the item machine. We commit + # to the scheme machine on colon. + ( [^:/?#]+ ':' @(colon,1) @scheme )? + + # Location machine. This is ambiguous with the item machine. We remain + # ambiguous until a second slash, at that point and all points after + # we place a higher priority on staying in the location machine over + # moving into the item machine. + ( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )? + + # Item machine. Ambiguous with both scheme and location, which both + # get a higher priority on the characters causing ambiguity. + ( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )? + + # Last two components, the characters that initiate these machines are + # not supported in any previous components, therefore there are no + # ambiguities introduced by these parts. + ( '?' [^#]* %query %/query)? + ( '#' any* %/last )?; +}%% |