From bbf93e620923bc5a0dac5e890f37ee693b31d806 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 10 Oct 2020 11:52:31 -0700 Subject: removed ragel examples left over from time repos were one --- examples/CMakeLists.txt | 11 -- examples/Makefile.am | 72 -------- examples/README | 40 ----- examples/atoi.rl | 59 ------ examples/awkemu.rl | 116 ------------ examples/awkequiv.awk | 10 -- examples/clang.rl | 150 ---------------- examples/concurrent.rl | 126 ------------- examples/cppscan.lex | 143 --------------- examples/cppscan.rec | 183 ------------------- examples/cppscan.rl | 208 ---------------------- examples/format.rl | 191 -------------------- examples/go/.gitignore | 5 - examples/go/Makefile | 32 ---- examples/go/README | 36 ---- examples/go/atoi.rl | 89 ---------- examples/go/rpn.rl | 159 ----------------- examples/go/url.rl | 414 ------------------------------------------- examples/go/url_authority.rl | 165 ----------------- examples/gotocallret.rl | 96 ---------- examples/mailbox.rl | 207 ---------------------- examples/params.rl | 102 ----------- examples/pullscan.rl | 170 ------------------ examples/rlscan.rl | 300 ------------------------------- examples/statechart.rl | 116 ------------ examples/uri.rl | 31 ---- 26 files changed, 3231 deletions(-) delete mode 100644 examples/CMakeLists.txt delete mode 100644 examples/Makefile.am delete mode 100644 examples/README delete mode 100644 examples/atoi.rl delete mode 100644 examples/awkemu.rl delete mode 100755 examples/awkequiv.awk delete mode 100644 examples/clang.rl delete mode 100644 examples/concurrent.rl delete mode 100644 examples/cppscan.lex delete mode 100644 examples/cppscan.rec delete mode 100644 examples/cppscan.rl delete mode 100644 examples/format.rl delete mode 100644 examples/go/.gitignore delete mode 100644 examples/go/Makefile delete mode 100644 examples/go/README delete mode 100644 examples/go/atoi.rl delete mode 100644 examples/go/rpn.rl delete mode 100644 examples/go/url.rl delete mode 100644 examples/go/url_authority.rl delete mode 100644 examples/gotocallret.rl delete mode 100644 examples/mailbox.rl delete mode 100644 examples/params.rl delete mode 100644 examples/pullscan.rl delete mode 100644 examples/rlscan.rl delete mode 100644 examples/statechart.rl delete mode 100644 examples/uri.rl (limited to 'examples') diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index 6ff75544..00000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -foreach(_example atoi awkemu clang concurrent cppscan format gotocallret - mailbox params rlscan statechart pullscan) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp" - DEPENDS ${_example}.rl - COMMAND ragel - ARGS -G2 -o "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp" - "${CMAKE_CURRENT_LIST_DIR}/${_example}.rl" - WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}") - add_executable(${_example} "${CMAKE_CURRENT_BINARY_DIR}/${_example}.cpp") -endforeach() diff --git a/examples/Makefile.am b/examples/Makefile.am deleted file mode 100644 index 7cda0171..00000000 --- a/examples/Makefile.am +++ /dev/null @@ -1,72 +0,0 @@ - -RAGEL = ../src/ragel -FLEX = flex -RE2C = re2c - -noinst_PROGRAMS = \ - atoi concurrent cppscan format gotocallret mailbox params \ - statechart - -EXTRA_DIST = \ - gotocallret.rl pullscan.rl concurrent.rl rlscan.rl statechart.rl \ - params.rl clang.rl cppscan.rl format.rl awkemu.rl mailbox.rl atoi.rl - -gotocallret_SOURCES = gotocallret.cpp -pullscan_SOURCES = pullscan.c -concurrent_SOURCES = concurrent.cpp -rlscan_SOURCES = rlscan.cpp -statechart_SOURCES = statechart.cpp -params_SOURCES = params.c -clang_SOURCES = clang.c -cppscan_SOURCES = cppscan.cpp -format_SOURCES = format.c -awkemu_SOURCES = awkemu.c -mailbox_SOURCES = mailbox.cpp -atoi_SOURCES = atoi.cpp - -gotocallret.cpp: gotocallret.rl - $(RAGEL) -G2 -o gotocallret.cpp gotocallret.rl - -pullscan.c: pullscan.rl $(RAGEL) - $(RAGEL) -G2 -o $@ pullscan.rl - -concurrent.cpp: concurrent.rl $(RAGEL) - $(RAGEL) -G2 -o concurrent.cpp concurrent.rl - -rlscan.cpp: rlscan.rl - $(RAGEL) -G2 -o rlscan.cpp rlscan.rl - -statechart.cpp: statechart.rl - $(RAGEL) -G2 -o statechart.cpp statechart.rl - -params.c: params.rl - $(RAGEL) -G2 -o params.c params.rl - -clang.c: clang.rl - $(RAGEL) -G2 -o clang.c clang.rl - -cppscan.cpp: cppscan.rl - $(RAGEL) -G2 -o $@ cppscan.rl - -format.c: format.rl - $(RAGEL) -G2 -o format.c format.rl - -awkemu.c: awkemu.rl - $(RAGEL) -G2 -o awkemu.c awkemu.rl - -mailbox.cpp: mailbox.rl - $(RAGEL) -G2 -o mailbox.cpp mailbox.rl - -atoi.cpp: atoi.rl - $(RAGEL) -G2 -o atoi.cpp atoi.rl - -### - -lex-cppscan.cpp: cppscan.lex - $(FLEX) -f -o $@ $< - -re2c-cppscan.cpp: cppscan.rec - $(RE2C) -s $< > $@ - -example.cpp: example.rec - $(RE2C) -s $< > $@ diff --git a/examples/README b/examples/README deleted file mode 100644 index 12773cb3..00000000 --- a/examples/README +++ /dev/null @@ -1,40 +0,0 @@ - - Ragel State Machine Compiler -- Examples - ======================================== - -atoi -- Converts a string to an integer. - -awkemu -- Perfoms the basic parsing that the awk program perfoms on input. - The awk equivalent to awkemu is in awkemu/awkequiv.awk - -clang -- A scanner for a simple C like language. It breaks input up into - words, numbers, strings and symbols and strips out whitespace - and comments. It is a suitable template for writing a parser - that finds a sequence of tokens. - -concurrent -- Demonstrates the ability of ragel to produce parsers that - perform independent tasks concurrently. - -cppscan -- A C++ scanner that uses the longest match scanning method. This - example differs from other examples of scanning. Each run of the - state machine matches one token. This method results in a - smaller state machine since the final kleene star is omitted and - therefore every state does not need to get all the transitions - of the start state. - -format -- Partial printf implementation. - -gotocallret -- Demonstrate the use of fgoto, fcall and fret. - -mailbox -- Parses unix mailbox files. It breaks files into messages, and - messages into headers and body. It demonstrates Ragel's ability - to make parsers for structured file formats. - -params -- Parses command line arguements. - -rlscan -- Lexes Ragel input files. - -statechart -- Demonstrate the use of labels, the epsilon operator, and the - join operator for creating machines using the named state and - transition list paradigm. This implementes the same machine as - the atoi example. diff --git a/examples/atoi.rl b/examples/atoi.rl deleted file mode 100644 index 7164b68d..00000000 --- a/examples/atoi.rl +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Convert a string to an integer. - */ - -#include -#include -#include - -%%{ - machine atoi; - write data; -}%% - -long long atoi( char *str ) -{ - char *p = str, *pe = str + strlen( str ); - int cs; - long long val = 0; - bool neg = false; - - %%{ - action see_neg { - neg = true; - } - - action add_digit { - val = val * 10 + (fc - '0'); - } - - main := - ( '-'@see_neg | '+' )? ( digit @add_digit )+ - '\n'; - - # Initialize and execute. - write init; - write exec; - }%% - - if ( neg ) - val = -1 * val; - - if ( cs < atoi_first_final ) - fprintf( stderr, "atoi: there was an error\n" ); - - return val; -}; - - -#define BUFSIZE 1024 - -int main() -{ - char buf[BUFSIZE]; - while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { - long long value = atoi( buf ); - printf( "%lld\n", value ); - } - return 0; -} diff --git a/examples/awkemu.rl b/examples/awkemu.rl deleted file mode 100644 index 6615943d..00000000 --- a/examples/awkemu.rl +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Perform the basic line parsing of input performed by awk. - */ - -#include -#include -#include -#include - -%%{ - machine awkemu; - - action start_word { - ws[nwords] = fpc; - } - - action end_word { - we[nwords++] = fpc; - } - - action start_line { - nwords = 0; - ls = fpc; - } - - action end_line { - printf("endline(%i): ", nwords ); - fwrite( ls, 1, p - ls, stdout ); - printf("\n"); - - for ( i = 0; i < nwords; i++ ) { - printf(" word: "); - fwrite( ws[i], 1, we[i] - ws[i], stdout ); - printf("\n"); - } - } - - # Words in a line. - word = ^[ \t\n]+; - - # The whitespace separating words in a line. - whitespace = [ \t]; - - # The components in a line to break up. Either a word or a single char of - # whitespace. On the word capture characters. - blineElements = word >start_word %end_word | whitespace; - - # Star the break line elements. Just be careful to decrement the leaving - # priority as we don't want multiple character identifiers to be treated as - # multiple single char identifiers. - line = ( blineElements** '\n' ) >start_line @end_line; - - # Any number of lines. - main := line*; -}%% - -%% write data noerror nofinal; - -#define MAXWORDS 256 -#define BUFSIZE 4096 -char buf[BUFSIZE]; - -int main() -{ - int i, nwords = 0; - char *ls = 0; - char *ws[MAXWORDS]; - char *we[MAXWORDS]; - - int cs; - int have = 0; - - %% write init; - - while ( 1 ) { - char *p, *pe, *data = buf + have; - int len, space = BUFSIZE - have; - /* fprintf( stderr, "space: %i\n", space ); */ - - if ( space == 0 ) { - fprintf(stderr, "buffer out of space\n"); - exit(1); - } - - len = fread( data, 1, space, stdin ); - /* fprintf( stderr, "len: %i\n", len ); */ - if ( len == 0 ) - break; - - /* Find the last newline by searching backwards. This is where - * we will stop processing on this iteration. */ - p = buf; - pe = buf + have + len - 1; - while ( *pe != '\n' && pe >= buf ) - pe--; - pe += 1; - - /* fprintf( stderr, "running on: %i\n", pe - p ); */ - - %% write exec; - - /* How much is still in the buffer. */ - have = data + len - pe; - if ( have > 0 ) - memmove( buf, pe, have ); - - /* fprintf(stderr, "have: %i\n", have ); */ - - if ( len < space ) - break; - } - - if ( have > 0 ) - fprintf(stderr, "input not newline terminated\n"); - return 0; -} diff --git a/examples/awkequiv.awk b/examples/awkequiv.awk deleted file mode 100755 index 9877dd36..00000000 --- a/examples/awkequiv.awk +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/awk -f -# - - -{ - print "endline(" NF "): " $0 - for ( i = 1; i <= NF; i++ ) { - print " word: " $i - } -} diff --git a/examples/clang.rl b/examples/clang.rl deleted file mode 100644 index 60491e5e..00000000 --- a/examples/clang.rl +++ /dev/null @@ -1,150 +0,0 @@ -/* - * A mini C-like language scanner. - */ - -#include -#include -#include - -%%{ - machine clang; - - newline = '\n' @{curline += 1;}; - any_count_line = any | newline; - - # Consume a C comment. - c_comment := any_count_line* :>> '*/' @{fgoto main;}; - - main := |* - - # Alpha numberic characters or underscore. - alnum_u = alnum | '_'; - - # Alpha charactres or underscore. - alpha_u = alpha | '_'; - - # Symbols. Upon entering clear the buffer. On all transitions - # buffer a character. Upon leaving dump the symbol. - ( punct - [_'"] ) { - printf( "symbol(%i): %c\n", curline, ts[0] ); - }; - - # Identifier. Upon entering clear the buffer. On all transitions - # buffer a character. Upon leaving, dump the identifier. - alpha_u alnum_u* { - printf( "ident(%i): ", curline ); - fwrite( ts, 1, te-ts, stdout ); - printf("\n"); - }; - - # Single Quote. - sliteralChar = [^'\\] | newline | ( '\\' . any_count_line ); - '\'' . sliteralChar* . '\'' { - printf( "single_lit(%i): ", curline ); - fwrite( ts, 1, te-ts, stdout ); - printf("\n"); - }; - - # Double Quote. - dliteralChar = [^"\\] | newline | ( '\\' any_count_line ); - '"' . dliteralChar* . '"' { - printf( "double_lit(%i): ", curline ); - fwrite( ts, 1, te-ts, stdout ); - printf("\n"); - }; - - # Whitespace is standard ws, newlines and control codes. - any_count_line - 0x21..0x7e; - - # Describe both c style comments and c++ style comments. The - # priority bump on tne terminator of the comments brings us - # out of the extend* which matches everything. - '//' [^\n]* newline; - - '/*' { fgoto c_comment; }; - - # Match an integer. We don't bother clearing the buf or filling it. - # The float machine overlaps with int and it will do it. - digit+ { - printf( "int(%i): ", curline ); - fwrite( ts, 1, te-ts, stdout ); - printf("\n"); - }; - - # Match a float. Upon entering the machine clear the buf, buffer - # characters on every trans and dump the float upon leaving. - digit+ '.' digit+ { - printf( "float(%i): ", curline ); - fwrite( ts, 1, te-ts, stdout ); - printf("\n"); - }; - - # Match a hex. Upon entering the hex part, clear the buf, buffer characters - # on every trans and dump the hex on leaving transitions. - '0x' xdigit+ { - printf( "hex(%i): ", curline ); - fwrite( ts, 1, te-ts, stdout ); - printf("\n"); - }; - - *|; -}%% - -%% write data nofinal; - -#define BUFSIZE 128 - -void scanner() -{ - static char buf[BUFSIZE]; - int cs, act, have = 0, curline = 1; - char *ts, *te = 0; - int done = 0; - - %% write init; - - while ( !done ) { - char *p = buf + have, *pe, *eof = 0; - int len, space = BUFSIZE - have; - - if ( space == 0 ) { - /* We've used up the entire buffer storing an already-parsed token - * prefix that must be preserved. */ - fprintf(stderr, "OUT OF BUFFER SPACE\n" ); - exit(1); - } - - len = fread( p, 1, space, stdin ); - pe = p + len; - - /* Check if this is the end of file. */ - if ( len < space ) { - eof = pe; - done = 1; - } - - %% write exec; - - if ( cs == clang_error ) { - fprintf(stderr, "PARSE ERROR\n" ); - break; - } - - if ( ts == 0 ) - have = 0; - else { - /* There is a prefix to preserve, shift it over. */ - have = pe - ts; - memmove( buf, ts, have ); - te = buf + (te-ts); - ts = buf; - } - } -} - -int main() -{ - scanner(); - return 0; -} - diff --git a/examples/concurrent.rl b/examples/concurrent.rl deleted file mode 100644 index 224f9601..00000000 --- a/examples/concurrent.rl +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Show off concurrent abilities. - */ - -#include -#include -#include - -using namespace std; - -#define BUFSIZE 2048 - -struct Concurrent -{ - int cur_char; - int start_word; - int start_comment; - int start_literal; - - int cs; - - int init( ); - int execute( const char *data, int len, bool isEof ); - int finish( ); -}; - -%%{ - machine Concurrent; - - action next_char { - cur_char += 1; - } - - action start_word { - start_word = cur_char; - } - action end_word { - cout << "word: " << start_word << - " " << cur_char-1 << endl; - } - - action start_comment { - start_comment = cur_char; - } - action end_comment { - cout << "comment: " << start_comment << - " " << cur_char-1 << endl; - } - - action start_literal { - start_literal = cur_char; - } - action end_literal { - cout << "literal: " << start_literal << - " " << cur_char-1 << endl; - } - - # Count characters. - chars = ( any @next_char )*; - - # Words are non-whitespace. - word = ( any-space )+ >start_word %end_word; - words = ( ( word | space ) $1 %0 )*; - - # Finds C style comments. - comment = ( '/*' any* :>> '*/' ) >start_comment %end_comment; - comments = ( comment | any )**; - - # Finds single quoted strings. - literalChar = ( any - ['\\] ) | ( '\\' . any ); - literal = ('\'' literalChar* '\'' ) >start_literal %end_literal; - literals = ( ( literal | (any-'\'') ) $1 %0 )*; - - main := chars | words | comments | literals; -}%% - -%% write data; - -int Concurrent::init( ) -{ - %% write init; - cur_char = 0; - return 1; -} - -int Concurrent::execute( const char *data, int len, bool isEof ) -{ - const char *p = data; - const char *pe = data + len; - const char *eof = isEof ? pe : 0; - - %% write exec; - - if ( cs == Concurrent_error ) - return -1; - if ( cs >= Concurrent_first_final ) - return 1; - return 0; -} - -int Concurrent::finish( ) -{ - if ( cs == Concurrent_error ) - return -1; - if ( cs >= Concurrent_first_final ) - return 1; - return 0; -} - -Concurrent concurrent; -char buf[BUFSIZE]; - -int main() -{ - concurrent.init(); - while ( 1 ) { - int len = fread( buf, 1, BUFSIZE, stdin ); - concurrent.execute( buf, len, len != BUFSIZE ); - if ( len != BUFSIZE ) - break; - } - - if ( concurrent.finish() <= 0 ) - cerr << "concurrent: error parsing input" << endl; - return 0; -} diff --git a/examples/cppscan.lex b/examples/cppscan.lex deleted file mode 100644 index fb662538..00000000 --- a/examples/cppscan.lex +++ /dev/null @@ -1,143 +0,0 @@ -/* - * flex equivalent to cppscan.rl - */ - -%{ - -#include - -#define TK_Dlit 256 -#define TK_Slit 257 -#define TK_Float 258 -#define TK_Id 259 -#define TK_NameSep 260 -#define TK_Arrow 261 -#define TK_PlusPlus 262 -#define TK_MinusMinus 263 -#define TK_ArrowStar 264 -#define TK_DotStar 265 -#define TK_ShiftLeft 266 -#define TK_ShiftRight 267 -#define TK_IntegerDecimal 268 -#define TK_IntegerOctal 269 -#define TK_IntegerHex 270 -#define TK_EqualsEquals 271 -#define TK_NotEquals 272 -#define TK_AndAnd 273 -#define TK_OrOr 274 -#define TK_MultAssign 275 -#define TK_DivAssign 276 -#define TK_PercentAssign 277 -#define TK_PlusAssign 278 -#define TK_MinusAssign 279 -#define TK_AmpAssign 280 -#define TK_CaretAssign 281 -#define TK_BarAssign 282 -#define TK_DotDotDot 283 -#define TK_Whitespace 284 -#define TK_Comment 285 - -int line = 1, col = 1; - -void token( int tok, char *data, int len ) -{ - printf( "<%i> ", tok ); - for ( int i = 0; i < len; i++ ) - fputc( data[i], stdout ); - fputc( '\n', stdout ); - - /* Count newlines and columns. This code is here mainly for having some - * code in the token routine when commenting out the above output during - * performance testing. */ - for ( int i = 0; i < len; i ++ ) { - if ( data[i] == '\n' ) { - line += 1; - col = 1; - } - else { - col += 1; - } - } -} - - -%} - -%x COMMENT - -FRACT_CONST [0-9]*\.[0-9]+|[0-9]+\. -EXPONENT [eE][+\-]?[0-9]+ -FLOAT_SUFFIX [flFL] - -%% - - /* Single and double literals. */ -L?\'([^\'\\\n]|\\.)*\' { - token( TK_Slit, yytext, yyleng ); -} - -L?\"([^\"\\\n]|\\.)*\" { - token( TK_Dlit, yytext, yyleng ); -} - -[a-zA-Z_][a-zA-Z0-9_]* { - token( TK_Id, yytext, yyleng ); -} - -{FRACT_CONST}{EXPONENT}?{FLOAT_SUFFIX}?|[0-9]+{EXPONENT}{FLOAT_SUFFIX}? { - token( TK_Float, yytext, yyleng ); -} - -(0|[1-9][0-9]*)[ulUL]{0,3} { - token( TK_IntegerDecimal, yytext, yyleng ); -} - -0[0-9]+[ulUL]{0,2} { - token( TK_IntegerOctal, yytext, yyleng ); -} - -0x[0-9a-fA-F]+[ulUL]{0,2} { - token( TK_IntegerHex, yytext, yyleng ); -} - -:: token( TK_NameSep, yytext, yyleng ); -== token( TK_EqualsEquals, yytext, yyleng ); -!= token( TK_NotEquals, yytext, yyleng ); -&& token( TK_AndAnd, yytext, yyleng ); -\|\| token( TK_OrOr, yytext, yyleng ); -\*= token( TK_MultAssign, yytext, yyleng ); -\/= token( TK_DivAssign, yytext, yyleng ); -%= token( TK_PercentAssign, yytext, yyleng ); -\+= token( TK_PlusAssign, yytext, yyleng ); --= token( TK_MinusAssign, yytext, yyleng ); -&= token( TK_AmpAssign, yytext, yyleng ); -^= token( TK_CaretAssign, yytext, yyleng ); -\|= token( TK_BarAssign, yytext, yyleng ); -\+\+ token( TK_PlusPlus, yytext, yyleng ); --- token( TK_MinusMinus, yytext, yyleng ); --> token( TK_Arrow, yytext, yyleng ); -->\* token( TK_ArrowStar, yytext, yyleng ); -\.\* token( TK_DotStar, yytext, yyleng ); -\.\.\. token( TK_DotDotDot, yytext, yyleng ); - -\/\* BEGIN(COMMENT); -\*\/ BEGIN(INITIAL); -(.|\n) { } - -\/\/.*\n {} -[^!-~]+ {} - -[!-/:-@\[-`{-~] token( yytext[0], yytext, yyleng ); - -%% - -int yywrap() -{ - /* Once the input is done, no more. */ - return 1; -} - -int main() -{ - yylex(); -} diff --git a/examples/cppscan.rec b/examples/cppscan.rec deleted file mode 100644 index 43f297d8..00000000 --- a/examples/cppscan.rec +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include -#include - -#define TK_Dlit 256 -#define TK_Slit 257 -#define TK_Float 258 -#define TK_Id 259 -#define TK_NameSep 260 -#define TK_Arrow 261 -#define TK_PlusPlus 262 -#define TK_MinusMinus 263 -#define TK_ArrowStar 264 -#define TK_DotStar 265 -#define TK_ShiftLeft 266 -#define TK_ShiftRight 267 -#define TK_IntegerDecimal 268 -#define TK_IntegerOctal 269 -#define TK_IntegerHex 270 -#define TK_EqualsEquals 271 -#define TK_NotEquals 272 -#define TK_AndAnd 273 -#define TK_OrOr 274 -#define TK_MultAssign 275 -#define TK_DivAssign 276 -#define TK_PercentAssign 277 -#define TK_PlusAssign 278 -#define TK_MinusAssign 279 -#define TK_AmpAssign 280 -#define TK_CaretAssign 281 -#define TK_BarAssign 282 -#define TK_DotDotDot 283 -#define TK_Whitespace 284 -#define TK_Comment 285 - -int line = 1, col = 1; - -void token( int tok, char *data, int len ) -{ - printf( "<%i> ", tok ); - for ( int i = 0; i < len; i++ ) - fputc( data[i], stdout ); - fputc( '\n', stdout ); - - /* Count newlines and columns. This code is here mainly for having some - * code in the token routine when commenting out the above output during - * performance testing. */ - for ( int i = 0; i < len; i ++ ) { - if ( data[i] == '\n' ) { - line += 1; - col = 1; - } - else { - col += 1; - } - } -} - -#define BUFSIZE 8192 -char buf[BUFSIZE]; - -void fill( int n ) -{ - printf("fill(%i)\n", n); - exit(1); -} - -int main() -{ - char *start, *p = buf, *lim = buf, *marker; - int len, have, want, shift; - int done = 0; - -#define YYCTYPE char - -#define YYCURSOR p -#define YYLIMIT lim -#define YYMARKER marker - -#define YYFILL(n) { \ - if ( ! done ) { \ - have = lim-start; \ - if ( start > buf ) { \ - shift = start-buf; \ - memmove( buf, start, have ); \ - start -= shift; \ - p -= shift; \ - lim -= shift; \ - marker -= shift; \ - } \ - want = BUFSIZE - have - 1; \ - len = fread( lim, 1, want, stdin ); \ - lim += len; \ - if ( len < want ) { \ - *lim++ = 0; \ - done = 1; \ - } \ - } \ - } - -again: - start = p; - -/*!re2c - -ANY = [\000-\377]; -FRACTCONST = ( [0-9]* "." [0-9]+ ) | [0-9]+ "."; -EXPONENT = [eE] [+\-]? [0-9]+; -FLOATSUFFIX = [flFL]; - - "L"? "\'" ( ANY \ [\'\\\n] | "\\" ANY )* "\'" { - token( TK_Slit, start, p-start ); - goto again; - } - - "L"? "\"" ( ANY \ [\"\\\n] | "\\" ANY )* "\"" { - token( TK_Dlit, start, p-start ); - goto again; - } - - [a-zA-Z_][a-zA-Z0-9_]* { - token( TK_Id, start, p-start ); - goto again; - } - - ( FRACTCONST EXPONENT? FLOATSUFFIX? ) | ( [0-9]+ EXPONENT FLOATSUFFIX? ) { - token( TK_Float, start, p-start ); - goto again; - } - - - ( "0" | [1-9][0-9]* ) [ulUL]* { - token( TK_IntegerDecimal, start, p-start ); - goto again; - } - - "0" [0-9]+ [ulUL]* { - token( TK_IntegerOctal, start, p-start ); - goto again; - } - - "0x" [0-9a-fA-F]+[ulUL]* { - token( TK_IntegerHex, start, p-start ); - goto again; - } - - "::" { token( TK_NameSep, start, p-start ); goto again; } - "==" { token( TK_EqualsEquals, start, p-start ); goto again; } - "!=" { token( TK_NotEquals, start, p-start ); goto again; } - "&&" { token( TK_AndAnd, start, p-start ); goto again; } - "||" { token( TK_OrOr, start, p-start ); goto again; } - "*=" { token( TK_MultAssign, start, p-start ); goto again; } - "/=" { token( TK_DivAssign, start, p-start ); goto again; } - "%=" { token( TK_PercentAssign, start, p-start ); goto again; } - "+=" { token( TK_PlusAssign, start, p-start ); goto again; } - "-=" { token( TK_MinusAssign, start, p-start ); goto again; } - "&=" { token( TK_AmpAssign, start, p-start ); goto again; } - "^=" { token( TK_CaretAssign, start, p-start ); goto again; } - "|=" { token( TK_BarAssign, start, p-start ); goto again; } - "++" { token( TK_PlusPlus, start, p-start ); goto again; } - "--" { token( TK_MinusMinus, start, p-start ); goto again; } - "->" { token( TK_Arrow, start, p-start ); goto again; } - "->*" { token( TK_ArrowStar, start, p-start ); goto again; } - ".*" { token( TK_DotStar, start, p-start ); goto again; } - "..." { token( TK_DotDotDot, start, p-start ); goto again; } - - "/*" { goto comment; } - "//" (ANY\"\n")* "\n" { goto again; } - [\001-\040\177]+ { goto again; } - - [\041-\057\072-\100\133-\140\173-\176] { - token( *start, start, p-start ); - goto again; - } - "\000" { return 0; } -*/ - -comment: -/*!re2c - "*/" { goto again; } - ANY { goto comment; } -*/ -} diff --git a/examples/cppscan.rl b/examples/cppscan.rl deleted file mode 100644 index 1ead5aa6..00000000 --- a/examples/cppscan.rl +++ /dev/null @@ -1,208 +0,0 @@ -/* - * A C++ scanner. Uses the longest match construction. - * << <= <<= >> >= >>= are left out since angle brackets are used in templates. - */ - -#include -#include -#include - -#define TK_Dlit 256 -#define TK_Slit 257 -#define TK_Float 258 -#define TK_Id 259 -#define TK_NameSep 260 -#define TK_Arrow 261 -#define TK_PlusPlus 262 -#define TK_MinusMinus 263 -#define TK_ArrowStar 264 -#define TK_DotStar 265 -#define TK_ShiftLeft 266 -#define TK_ShiftRight 267 -#define TK_IntegerDecimal 268 -#define TK_IntegerOctal 269 -#define TK_IntegerHex 270 -#define TK_EqualsEquals 271 -#define TK_NotEquals 272 -#define TK_AndAnd 273 -#define TK_OrOr 274 -#define TK_MultAssign 275 -#define TK_DivAssign 276 -#define TK_PercentAssign 277 -#define TK_PlusAssign 278 -#define TK_MinusAssign 279 -#define TK_AmpAssign 280 -#define TK_CaretAssign 281 -#define TK_BarAssign 282 -#define TK_DotDotDot 283 -#define TK_Whitespace 284 -#define TK_Comment 285 - -#define BUFSIZE 16384 - -/* EOF char used to flush out that last token. This should be a whitespace - * token. */ - -#define LAST_CHAR 0 - -using std::cerr; -using std::cout; -using std::cin; -using std::endl; - -static char buf[BUFSIZE]; -static int line = 1, col = 1; -static char *ts, *te; -static int act, have = 0; -static int cs; - -%%{ - machine Scanner; - write data nofinal; - - # Floating literals. - fract_const = digit* '.' digit+ | digit+ '.'; - exponent = [eE] [+\-]? digit+; - float_suffix = [flFL]; - - c_comment := - any* :>> '*/' - @{ fgoto main; }; - - main := |* - - # Single and double literals. - ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) - {token( TK_Slit );}; - ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) - {token( TK_Dlit );}; - - # Identifiers - ( [a-zA-Z_] [a-zA-Z0-9_]* ) - {token( TK_Id );}; - - # Floating literals. - ( fract_const exponent? float_suffix? | digit+ exponent float_suffix? ) - {token( TK_Float );}; - - # Integer decimal. Leading part buffered by float. - ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) - {token( TK_IntegerDecimal );}; - - # Integer octal. Leading part buffered by float. - ( '0' [0-9]+ [ulUL]{0,2} ) - {token( TK_IntegerOctal );}; - - # Integer hex. Leading 0 buffered by float. - ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) ) - {token( TK_IntegerHex );}; - - # Only buffer the second item, first buffered by symbol. */ - '::' {token( TK_NameSep );}; - '==' {token( TK_EqualsEquals );}; - '!=' {token( TK_NotEquals );}; - '&&' {token( TK_AndAnd );}; - '||' {token( TK_OrOr );}; - '*=' {token( TK_MultAssign );}; - '/=' {token( TK_DivAssign );}; - '%=' {token( TK_PercentAssign );}; - '+=' {token( TK_PlusAssign );}; - '-=' {token( TK_MinusAssign );}; - '&=' {token( TK_AmpAssign );}; - '^=' {token( TK_CaretAssign );}; - '|=' {token( TK_BarAssign );}; - '++' {token( TK_PlusPlus );}; - '--' {token( TK_MinusMinus );}; - '->' {token( TK_Arrow );}; - '->*' {token( TK_ArrowStar );}; - '.*' {token( TK_DotStar );}; - - # Three char compounds, first item already buffered. */ - '...' {token( TK_DotDotDot );}; - - # Single char symbols. - ( punct - [_"'] ) {token( ts[0] );}; - - # Comments and whitespace. - '/*' { fgoto c_comment; }; - '//' [^\n]* '\n'; - ( any - 33..126 )+; - - *|; -}%% - -void token( int tok ) -{ - char *data = ts; - int len = te - ts; - - cout << '<' << tok << "> "; - cout.write( data, len ); - cout << '\n'; - - /* Count newlines and columns. This code is here mainly for having some - * code in the token routine when commenting out the above output during - * performance testing. */ - for ( int i = 0; i < len; i ++ ) { - if ( data[i] == '\n' ) { - line += 1; - col = 1; - } - else { - col += 1; - } - } -} - -int main() -{ - std::ios::sync_with_stdio(false); - - %% write init; - - /* Do the first read. */ - bool done = false; - while ( !done ) { - char *p = buf + have; - int space = BUFSIZE - have; - - if ( space == 0 ) { - /* We filled up the buffer trying to scan a token. */ - cerr << "OUT OF BUFFER SPACE" << endl; - exit(1); - } - - cin.read( p, space ); - int len = cin.gcount(); - char *pe = p + len; - char *eof = 0; - - /* If we see eof then append the EOF char. */ - if ( cin.eof() ) { - eof = pe; - done = true; - } - - %% write exec; - - /* Check if we failed. */ - if ( cs == Scanner_error ) { - /* Machine failed before finding a token. */ - cerr << "PARSE ERROR" << endl; - exit(1); - } - - /* Now set up the prefix. */ - if ( ts == 0 ) - have = 0; - else { - /* There is data that needs to be shifted over. */ - have = pe - ts; - memmove( buf, ts, have ); - te -= (ts-buf); - ts = buf; - } - } - - return 0; -} diff --git a/examples/format.rl b/examples/format.rl deleted file mode 100644 index f8a37beb..00000000 --- a/examples/format.rl +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Partial printf implementation. - */ - -#define BUFLEN 1024 -#include - -typedef void (*WriteFunc)( char *data, int len ); - -struct format -{ - char buf[BUFLEN+1]; - int buflen; - WriteFunc write; - - int flags; - int width; - int prec; - int cs; -}; - -void do_conv( struct format *fsm, char c ) -{ - printf( "flags: %x\n", fsm->flags ); - printf( "width: %i\n", fsm->width ); - printf( "prec: %i\n", fsm->prec ); - printf( "conv: %c\n", c ); - printf( "\n" ); -} - -#define FL_HASH 0x01 -#define FL_ZERO 0x02 -#define FL_DASH 0x04 -#define FL_SPACE 0x08 -#define FL_PLUS 0x10 - -#define FL_HAS_WIDTH 0x0100 -#define FL_WIDTH_ARG 0x0200 -#define FL_HAS_PREC 0x0400 -#define FL_PREC_ARG 0x0800 - -#define FL_LEN_H 0x010000 -#define FL_LEN_HH 0x020000 -#define FL_LEN_L 0x040000 -#define FL_LEN_LL 0x080000 - -%%{ - machine format; - access fsm->; - - action clear { - fsm->flags = 0; - fsm->width = 0; - fsm->prec = 0; - } - - # A non-zero number. - nznum = [1-9] [0-9]*; - - # Width - action width_num { fsm->width = 10 * fsm->width + (fc-'0'); } - action width_arg { fsm->flags |= FL_WIDTH_ARG; } - action width { fsm->flags |= FL_HAS_WIDTH; } - width = ( ( nznum $width_num | '*' @width_arg ) %width )?; - - # Precision - action prec_num { fsm->prec = 10 * fsm->prec + (fc-'0'); } - action prec_arg { fsm->flags |= FL_PREC_ARG; } - action prec { fsm->flags |= FL_HAS_PREC; } - precision = ( '.' ( digit* $prec_num %prec | '*' @prec_arg ) )?; - - # Flags - action flags_hash { fsm->flags |= FL_HASH; } - action flags_zero { fsm->flags |= FL_ZERO; } - action flags_dash { fsm->flags |= FL_DASH; } - action flags_space { fsm->flags |= FL_SPACE; } - action flags_plus { fsm->flags |= FL_PLUS; } - - flags = ( - '#' @flags_hash | - '0' @flags_zero | - '-' @flags_dash | - ' ' @flags_space | - '+' @flags_plus )*; - - action length_h { fsm->flags |= FL_LEN_H; } - action length_l { fsm->flags |= FL_LEN_L; } - action length_hh { fsm->flags |= FL_LEN_HH; } - action length_ll { fsm->flags |= FL_LEN_LL; } - - # Must use leaving transitions on 'h' and 'l' because they are - # prefixes for 'hh' and 'll'. - length = ( - 'h' %length_h | - 'l' %length_l | - 'hh' @length_hh | - 'll' @length_ll )?; - - action conversion { - do_conv( fsm, fc ); - } - - conversion = [diouxXcsp] @conversion; - - fmt_spec = - '%' @clear - flags - width - precision - length - conversion; - - action emit { - if ( fsm->buflen == BUFLEN ) { - fsm->write( fsm->buf, fsm->buflen ); - fsm->buflen = 0; - } - fsm->buf[fsm->buflen++] = fc; - } - - action finish_ok { - if ( fsm->buflen > 0 ) - fsm->write( fsm->buf, fsm->buflen ); - } - action finish_err { - printf("EOF IN FORMAT\n"); - } - action err_char { - printf("ERROR ON CHAR: 0x%x\n", fc ); - } - - main := ( - [^%] @emit | - '%%' @emit | - fmt_spec - )* @/finish_err %/finish_ok $!err_char; -}%% - -%% write data; - -void format_init( struct format *fsm ) -{ - fsm->buflen = 0; - %% write init; -} - -void format_execute( struct format *fsm, const char *data, int len, int isEof ) -{ - const char *p = data; - const char *pe = data + len; - const char *eof = isEof ? pe : 0; - - %% write exec; -} - -int format_finish( struct format *fsm ) -{ - if ( fsm->cs == format_error ) - return -1; - if ( fsm->cs >= format_first_final ) - return 1; - return 0; -} - - -#define INPUT_BUFSIZE 2048 - -struct format fsm; -char buf[INPUT_BUFSIZE]; - -void write(char *data, int len ) -{ - fwrite( data, 1, len, stdout ); -} - -int main() -{ - fsm.write = write; - format_init( &fsm ); - while ( 1 ) { - int len = fread( buf, 1, INPUT_BUFSIZE, stdin ); - int eof = len != INPUT_BUFSIZE; - format_execute( &fsm, buf, len, eof ); - if ( eof ) - break; - } - if ( format_finish( &fsm ) <= 0 ) - printf("FAIL\n"); - return 0; -} - diff --git a/examples/go/.gitignore b/examples/go/.gitignore deleted file mode 100644 index f8b421d6..00000000 --- a/examples/go/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/*.dot -/*.go -/atoi -/rpn -/url diff --git a/examples/go/Makefile b/examples/go/Makefile deleted file mode 100644 index 536afcc7..00000000 --- a/examples/go/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -ragel = ragel - -check: atoi rpn url - ./atoi - ./rpn - ./url - @echo PASS - -graph: atoi.dot rpn.dot url.dot url_authority.dot - xdot atoi.dot - xdot rpn.dot - xdot url.dot - xdot url_authority.dot - -atoi: atoi.go -atoi.go: atoi.rl -atoi.dot: atoi.rl - -rpn: rpn.go -rpn.go: rpn.rl -rpn.dot: rpn.rl - -url: url.go url_authority.go -url.go: url.rl -url.dot: url.rl -url_authority.go: url_authority.rl -url_authority.dot: url_authority.rl - -clean: ; rm -f *.go *.dot atoi rpn url -%: %.go ; go build -o $@ $^ -%.go: %.rl ; $(ragel) -Z -T0 -o $@ $< -%.dot: %.rl ; $(ragel) -V -Z -p -o $@ $< diff --git a/examples/go/README b/examples/go/README deleted file mode 100644 index bdb924b8..00000000 --- a/examples/go/README +++ /dev/null @@ -1,36 +0,0 @@ -.. -*-rst-*- - -Ragel Examples for Go -===================== - -These examples serve the following purposes: - -- Help you learn Ragel -- Test the correctness of the code I wrote for Ragel -- Benchmark Ragel's performance on your machine -- And hopefully give you some code you can steal ;] - -To get started you should first ``make install`` ragel. Then navigate -to this directory and run:: - - make - -To automatically compile/test/benchmark these examples. - -The following examples are provided: - -- atoi.rl: Convert string to integer (very simple) -- rpn.rl: Reverse polish notation calculator (simple) -- url.rl: Very fast and robust HTTP/SIP URL parser (very complicated) - -To see graphviz diagrams of the state machines generated by Ragel in -these examples, run the following commands:: - - sudo apt-get install xdot - make graph - -Those diagrams (along with the pdf manual) are super important for -troubleshooting and simplifying your Ragel code. - -I truly hope these examples help you in your personal and professional -endeavors. If you have any questions my email is: jtunney@gmail.com diff --git a/examples/go/atoi.rl b/examples/go/atoi.rl deleted file mode 100644 index 97c5163e..00000000 --- a/examples/go/atoi.rl +++ /dev/null @@ -1,89 +0,0 @@ -// -*-go-*- -// -// Convert a string to an integer. -// -// To compile: -// -// ragel -Z -T0 -o atoi.go atoi.rl -// go build -o atoi atoi.go -// ./atoi -// -// To show a diagram of your state machine: -// -// ragel -V -Z -p -o atoi.dot atoi.rl -// xdot atoi.dot -// - -package main - -import ( - "os" - "fmt" -) - -%%{ - machine atoi; - write data; -}%% - -func atoi(data string) (val int) { - cs, p, pe := 0, 0, len(data) - neg := false - - %%{ - action see_neg { neg = true } - action add_digit { val = val * 10 + (int(fc) - '0') } - - main := - ( '-'@see_neg | '+' )? ( digit @add_digit )+ - '\n'? - ; - - write init; - write exec; - }%% - - if neg { - val = -1 * val; - } - - if cs < atoi_first_final { - fmt.Println("atoi: there was an error:", cs, "<", atoi_first_final) - fmt.Println(data) - for i := 0; i < p; i++ { - fmt.Print(" ") - } - fmt.Println("^") - } - - return val -} - -////////////////////////////////////////////////////////////////////// - -type atoiTest struct { - s string - v int -} - -var atoiTests = []atoiTest{ - atoiTest{"7", 7}, - atoiTest{"666", 666}, - atoiTest{"-666", -666}, - atoiTest{"+666", 666}, - atoiTest{"1234567890", 1234567890}, - atoiTest{"+1234567890\n", 1234567890}, - // atoiTest{"+ 1234567890", 1234567890}, // i will fail -} - -func main() { - res := 0 - for _, test := range atoiTests { - res := atoi(test.s) - if res != test.v { - fmt.Fprintf(os.Stderr, "FAIL atoi(%#v) != %#v\n", test.s, test.v) - res = 1 - } - } - os.Exit(res) -} diff --git a/examples/go/rpn.rl b/examples/go/rpn.rl deleted file mode 100644 index 2ad0a2db..00000000 --- a/examples/go/rpn.rl +++ /dev/null @@ -1,159 +0,0 @@ -// -*-go-*- -// -// Reverse Polish Notation Calculator -// Copyright (c) 2010 J.A. Roberts Tunney -// MIT License -// -// To compile: -// -// ragel -Z -T0 -o rpn.go rpn.rl -// go build -o rpn rpn.go -// ./rpn -// -// To show a diagram of your state machine: -// -// ragel -V -Z -p -o rpn.dot rpn.rl -// xdot -Tpng -o rpn.png rpn.dot -// - -package main - -import ( - "errors" - "fmt" - "os" - "strconv" -) - -type stack struct { - items []int - count int -} - -func (s *stack) pop() int { - s.count-- - v := s.items[s.count] - return v -} - -func (s *stack) push(v int) { - s.items[s.count] = v - s.count++ -} - -func abs(v int) int { - if v < 0 { - v = -v - } - return v -} - -%% machine rpn; -%% write data; - -func rpn(data string) (res int, err error) { - // p, pe, eof := 0, len(data), len(data) - cs, p, pe := 0, 0, len(data) - mark := 0 - st := &stack{items: make([]int, 128), count: 0} - - %%{ - action mark { mark = p } - action push { x, _ := strconv.Atoi(data[mark:p]); st.push(x) } - action add { y, x := st.pop(), st.pop(); st.push(x + y) } - action sub { y, x := st.pop(), st.pop(); st.push(x - y) } - action mul { y, x := st.pop(), st.pop(); st.push(x * y) } - action div { y, x := st.pop(), st.pop(); st.push(x / y) } - action abs { st.push(abs(st.pop())) } - action abba { st.push(666) } - - stuff = digit+ >mark %push - | '+' @add - | '-' @sub - | '*' @mul - | '/' @div - | 'abs' %abs - | 'add' %add - | 'abba' %abba - ; - - main := ( space | stuff space )* ; - - write init; - write exec; - }%% - - if cs < rpn_first_final { - if p == pe { - return 0, errors.New("unexpected eof") - } else { - return 0, errors.New(fmt.Sprintf("error at position %d", p)) - } - } - - if st.count == 0 { - return 0, errors.New("rpn stack empty on result") - } - - return st.pop(), nil -} - -////////////////////////////////////////////////////////////////////// - -type rpnTest struct { - s string - v int -} - -var rpnTests = []rpnTest{ - rpnTest{"666\n", 666}, - rpnTest{"666 111\n", 111}, - rpnTest{"4 3 add\n", 7}, - rpnTest{"4 3 +\n", 7}, - rpnTest{"4 3 -\n", 1}, - rpnTest{"4 3 *\n", 12}, - rpnTest{"6 2 /\n", 3}, - rpnTest{"0 3 -\n", -3}, - rpnTest{"0 3 - abs\n", 3}, - rpnTest{" 2 2 + 3 - \n", 1}, - rpnTest{"10 7 3 2 * - +\n", 11}, - rpnTest{"abba abba add\n", 1332}, -} - -type rpnFailTest struct { - s string - e string -} - -var rpnFailTests = []rpnFailTest{ - rpnFailTest{"\n", "rpn stack empty on result"}, -} - -func main() { - rc := 0 - - for _, test := range rpnTests { - res, err := rpn(test.s) - if err != nil { - fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) %s\n", test.s, err) - rc = 1 - } else if res != test.v { - fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) -> %#v != %#v\n", - test.s, res, test.v) - rc = 1 - } - } - - for _, test := range rpnFailTests { - res, err := rpn(test.s) - if err == nil { - fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) -> %#v should fail: %#v\n", - test.s, res, test.e) - } else if err.Error() != test.e { - fmt.Fprintf(os.Stderr, "FAIL rpn(%#v) %#v should be %#v\n", - test.s, err.Error(), test.e) - } - } - - os.Exit(rc) -} diff --git a/examples/go/url.rl b/examples/go/url.rl deleted file mode 100644 index e94d59c6..00000000 --- a/examples/go/url.rl +++ /dev/null @@ -1,414 +0,0 @@ -// -*-go-*- -// -// URL Parser -// Copyright (c) 2010 J.A. Roberts Tunney -// MIT License -// -// To compile: -// -// ragel -Z -T0 -o url.go url.rl -// ragel -Z -T0 -o url_authority.go url_authority.rl -// go build -o url url.go url_authority.go -// ./url -// -// To show a diagram of your state machine: -// -// ragel -V -Z -p -o url.dot url.rl -// xdot url.dot -// -// ragel -V -Z -p -o url_authority.dot url_authority.rl -// xdot url_authority.dot -// -// Reference: -// -// - http://tools.ietf.org/html/rfc3986 -// - -package main - -import ( - "errors" - "fmt" - "os" - "time" -) - -type URL struct { - Scheme string // http, sip, file, etc. (never blank, always lowercase) - User string // who is you yo - Pass string // for like, logging in - Host string // IP 4/6 address or hostname (mandatory) - Port int // like 80 or 5060 (default 0) - Params string // stuff after ';' (NOT UNESCAPED, used in sip) - Path string // stuff starting with '/' - Query string // stuff after '?' (NOT UNESCAPED) - Fragment string // stuff after '#' -} - -%% machine url; -%% write data; - -// i parse absolute urls and don't suck at it. i'll parse just about -// any type of url you can think of and give you a human-friendly data -// structure. -// -// this routine takes no more than a few microseconds, is reentrant, -// performs in a predictable manner (for security/soft-realtime,) -// doesn't modify your `data` buffer, and under no circumstances will -// it panic (i hope!) -func URLParse(data []byte) (url *URL, err error) { - cs, p, pe, eof := 0, 0, len(data), len(data) - mark := 0 - url = new(URL) - - // this buffer is so we can unescape while we roll - var hex byte - buf := make([]byte, len(data)) - amt := 0 - - %%{ - action mark { mark = p } - action str_start { amt = 0 } - action str_char { buf[amt] = fc; amt++ } - action str_lower { buf[amt] = fc + 0x20; amt++ } - action hex_hi { hex = unhex(fc) * 16 } - action hex_lo { hex += unhex(fc) - buf[amt] = hex; amt++ } - action scheme { url.Scheme = string(buf[0:amt]) } - action authority { err = url.parseAuthority(data[mark:p]) - if err != nil { return nil, err } } - action path { url.Path = string(buf[0:amt]) } - action query { url.Query = string(data[mark:p]) } - action fragment { url.Fragment = string(buf[0:amt]) } - - # # do this instead if you *actually* use URNs (lol) - # action authority { url.Authority = string(data[mark:p]) } - - # define what a single character is allowed to be - toxic = ( cntrl | 127 ) ; - scary = ( toxic | " " | "\"" | "#" | "%" | "<" | ">" ) ; - schmchars = ( lower | digit | "+" | "-" | "." ) ; - authchars = any -- ( scary | "/" | "?" | "#" ) ; - pathchars = any -- ( scary | "?" | "#" ) ; - querchars = any -- ( scary | "#" ) ; - fragchars = any -- ( scary ) ; - - # define how characters trigger actions - escape = "%" xdigit xdigit ; - unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ; - schmfirst = ( upper @str_lower ) | ( lower @str_char ) ; - schmchar = ( upper @str_lower ) | ( schmchars @str_char ) ; - authchar = escape | authchars ; - pathchar = unescape | ( pathchars @str_char ) ; - querchar = escape | querchars ; - fragchar = unescape | ( fragchars @str_char ) ; - - # define multi-character patterns - scheme = ( schmfirst schmchar* ) >str_start %scheme ; - authority = authchar+ >mark %authority ; - path = ( ( "/" @str_char ) pathchar* ) >str_start %path ; - query = "?" ( querchar* >mark %query ) ; - fragment = "#" ( fragchar* >str_start %fragment ) ; - url = scheme ":" "//"? authority path? query? fragment? - | scheme ":" "//" authority? path? query? fragment? - ; - - main := url; - write init; - write exec; - }%% - - if cs < url_first_final { - if p == pe { - return nil, errors.New( - fmt.Sprintf("unexpected eof: %s", data)) - } else { - return nil, errors.New( - fmt.Sprintf("error in url at pos %d: %s", p, data)) - } - } - - return url, nil -} - -func unhex(b byte) byte { - switch { - case '0' <= b && b <= '9': - return b - '0' - case 'a' <= b && b <= 'f': - return b - 'a' + 10 - case 'A' <= b && b <= 'F': - return b - 'A' + 10 - } - return 0 -} - -////////////////////////////////////////////////////////////////////// - -type urlTest struct { - s []byte - url URL -} - -var urlTests = []urlTest{ - - urlTest{ - []byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"), - URL{ - Scheme: "http", - User: "user", - Pass: "pass", - Host: "example.com", - Port: 80, - Params: "hello", - Path: "/lol.php", - Query: "fun", - Fragment: "omg", - }, - }, - - urlTest{ - []byte("a:b"), - URL{ - Scheme: "a", - Host: "b", - }, - }, - - urlTest{ - []byte("GoPHeR://@example.com@:;/?#"), - URL{ - Scheme: "gopher", - Host: "@example.com@", - Path: "/", - }, - }, - - urlTest{ - []byte("ldap://[2001:db8::7]/c=GB?objectClass/?one"), - URL{ - Scheme: "ldap", - Host: "2001:db8::7", - Path: "/c=GB", - Query: "objectClass/?one", - }, - }, - - urlTest{ - []byte("http://user@example.com"), - URL{ - Scheme: "http", - User: "user", - Host: "example.com", - }, - }, - - urlTest{ - []byte("http://品研发和研发管@☃.com:65000;%20"), - URL{ - Scheme: "http", - User: "品研发和研发管", - Host: "☃.com", - Port: 65000, - Params: "%20", - }, - }, - - urlTest{ - []byte("https://example.com:80"), - URL{ - Scheme: "https", - Host: "example.com", - Port: 80, - }, - }, - - urlTest{ - []byte("file:///etc/passwd"), - URL{ - Scheme: "file", - Path: "/etc/passwd", - }, - }, - - urlTest{ - []byte("file:///c:/WINDOWS/clock.avi"), - URL{ - Scheme: "file", - Path: "/c:/WINDOWS/clock.avi", // <-- is this kosher? - }, - }, - - urlTest{ - []byte("file://hostname/path/to/the%20file.txt"), - URL{ - Scheme: "file", - Host: "hostname", - Path: "/path/to/the file.txt", - }, - }, - - urlTest{ - []byte("sip:example.com"), - URL{ - Scheme: "sip", - Host: "example.com", - }, - }, - - urlTest{ - []byte("sip:example.com:5060"), - URL{ - Scheme: "sip", - Host: "example.com", - Port: 5060, - }, - }, - - urlTest{ - []byte("mailto:ditto@pokémon.com"), - URL{ - Scheme: "mailto", - User: "ditto", - Host: "pokémon.com", - }, - }, - - urlTest{ - []byte("sip:[dead:beef::666]:5060"), - URL{ - Scheme: "sip", - Host: "dead:beef::666", - Port: 5060, - }, - }, - - urlTest{ - []byte("tel:+12126660420"), - URL{ - Scheme: "tel", - Host: "+12126660420", - }, - }, - - urlTest{ - []byte("sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg"), - URL{ - Scheme: "sip", - User: "bob barker", - Pass: "priceisright", - Host: "dead:beef::666", - Port: 5060, - Params: "isup-oli=00", - Path: "/palfun.html", - Query: "haha", - Fragment: "omg", - }, - }, - - urlTest{ - []byte("http://www.google.com/search?%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai="), - URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/search", - Query: "%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=", - }, - }, - -} - -func (test *urlTest) compare(url *URL) (passed bool) { - if url.Scheme != test.url.Scheme { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) scheme: %#v != %#v\n", - string(test.s), url.Scheme, test.url.Scheme) - passed = true - } - if url.User != test.url.User { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) user: %#v != %#v\n", - string(test.s), url.User, test.url.User) - passed = true - } - if url.Pass != test.url.Pass { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) pass: %#v != %#v\n", - string(test.s), url.Pass, test.url.Pass) - passed = true - } - if url.Host != test.url.Host { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) host: %#v != %#v\n", - string(test.s), url.Host, test.url.Host) - passed = true - } - if url.Port != test.url.Port { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n", - string(test.s), url.Port, test.url.Port) - passed = true - } - if url.Port != test.url.Port { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n", - string(test.s), url.Port, test.url.Port) - passed = true - } - if url.Params != test.url.Params { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) params: %#v != %#v\n", - string(test.s), url.Params, test.url.Params) - passed = true - } - if url.Path != test.url.Path { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) path: %#v != %#v\n", - string(test.s), url.Path, test.url.Path) - passed = true - } - if url.Query != test.url.Query { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) query: %#v != %#v\n", - string(test.s), url.Query, test.url.Query) - passed = true - } - if url.Fragment != test.url.Fragment { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) fragment: %#v != %#v\n", - string(test.s), url.Fragment, test.url.Fragment) - passed = true - } - return !passed -} - -func bench() { - const rounds = 10000 - for _, s := range [][]byte{ - []byte("a:a"), - []byte("http://google.com/"), - []byte("sip:jtunney@lobstertech.com"), - []byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"), - []byte("file:///etc/passwd"), - } { - ts1 := time.Now() - for i := 0; i < rounds; i++ { - URLParse(s) - } - ts2 := time.Now() - fmt.Printf("BENCH URLParse(%s) -> %d ns\n", s, ts2.Sub(ts1).Nanoseconds() / rounds) - } -} - -func test() (rc int) { - for _, test := range urlTests { - url, err := URLParse(test.s) - if err != nil { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) %s\n", string(test.s), err) - rc = 1 - continue - } - if !test.compare(url) { - rc = 1 - } - } - return rc -} - -func main() { - rc := test() - if rc == 0 { - bench() - } - os.Exit(rc) -} diff --git a/examples/go/url_authority.rl b/examples/go/url_authority.rl deleted file mode 100644 index 3e651ad0..00000000 --- a/examples/go/url_authority.rl +++ /dev/null @@ -1,165 +0,0 @@ -// -*-go-*- -// -// URL Parser -// Copyright (c) 2010 J.A. Roberts Tunney -// MIT License -// - -package main - -import ( - "errors" - "fmt" - "strconv" -) - -%% machine url_authority; -%% write data; - -// i parse strings like `alice@pokémon.com`. -// -// sounds simple right? but i also parse stuff like: -// -// bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00 -// -// which in actual reality is: -// -// - User: "bob barker" -// - Pass: "priceisright" -// - Host: "dead:beef::666" -// - Port: 5060 -// - Params: "isup-oli=00" -// -// which was probably extracted from an absolute url that looked like: -// -// sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg -// -// which was probably extracted from its address form: -// -// "Bob Barker" ;tag=666 -// -// who would have thought this could be so hard ._. -func (url *URL) parseAuthority(data []byte) (err error) { - cs, p, pe, eof := 0, 0, len(data), len(data) - mark := 0 - - // temporary holding place for user:pass and/or host:port cuz an - // optional term (user[:pass]) coming before a mandatory term - // (host[:pass]) would require require backtracking and all that - // evil nondeterministic stuff which ragel seems to hate. (for - // this same reason you're also allowed to use square quotes - // around the username.) - var b1, b2 string - - // this buffer is so we can unescape while we roll - var hex byte - buf := make([]byte, len(data)) - amt := 0 - - %%{ - action mark { mark = p } - action str_start { amt = 0 } - action str_char { buf[amt] = fc; amt++ } - action hex_hi { hex = unhex(fc) * 16 } - action hex_lo { hex += unhex(fc) - buf[amt] = hex; amt++ } - action copy_b1 { b1 = string(buf[0:amt]); amt = 0 } - action copy_b2 { b2 = string(buf[0:amt]); amt = 0 } - action copy_host { url.Host = string(b1); amt = 0 } - - action copy_port { - if b2 != "" { - url.Port, err = strconv.Atoi(string(b2)) - if err != nil { goto fail } - if url.Port > 65535 { goto fail } - } - } - - action params { - url.Params = string(data[mark:p]) - } - - action params_eof { - url.Params = string(data[mark:p]) - return nil - } - - action atsymbol { - url.User = string(b1) - url.Pass = string(b2) - b2 = "" - } - - action alldone { - url.Host = string(b1) - if url.Host == "" { - url.Host = string(buf[0:amt]) - } else { - if amt > 0 { - b2 = string(buf[0:amt]) - } - if b2 != "" { - url.Port, err = strconv.Atoi(string(b2)) - if err != nil { goto fail } - if url.Port > 65535 { goto fail } - } - } - return nil - } - - # define what a single character is allowed to be - toxic = ( cntrl | 127 ) ; - scary = ( toxic | space | "\"" | "#" | "%" | "<" | ">" ) ; - authdelims = ( "/" | "?" | "#" | ":" | "@" | ";" | "[" | "]" ) ; - userchars = any -- ( authdelims | scary ) ; - userchars_esc = userchars | ":" ; - passchars = userchars ; - hostchars = passchars | "@" ; - hostchars_esc = hostchars | ":" ; - portchars = digit ; - paramchars = hostchars | ":" | ";" ; - - # define how characters trigger actions - escape = "%" xdigit xdigit ; - unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ; - userchar = unescape | ( userchars @str_char ) ; - userchar_esc = unescape | ( userchars_esc @str_char ) ; - passchar = unescape | ( passchars @str_char ) ; - hostchar = unescape | ( hostchars @str_char ) ; - hostchar_esc = unescape | ( hostchars_esc @str_char ) ; - portchar = unescape | ( portchars @str_char ) ; - paramchar = escape | paramchars ; - - # define multi-character patterns - user_plain = userchar+ >str_start %copy_b1 ; - user_quoted = "[" ( userchar_esc+ >str_start %copy_b1 ) "]" ; - user = ( user_quoted | user_plain ) %/alldone ; - pass = passchar+ >str_start %copy_b2 %/alldone ; - host_plain = hostchar+ >str_start %copy_b1 %copy_host ; - host_quoted = "[" ( hostchar_esc+ >str_start %copy_b1 %copy_host ) "]" ; - host = ( host_quoted | host_plain ) %/alldone ; - port = portchar* >str_start %copy_b2 %copy_port %/alldone ; - params = ";" ( paramchar* >mark %params %/params_eof ) ; - userpass = user ( ":" pass )? ; - hostport = host ( ":" port )? ; - authority = ( userpass ( "@" @atsymbol ) )? hostport params? ; - - main := authority; - write init; - write exec; - }%% - - // if cs >= url_authority_first_final { - // return nil - // } - -fail: - // fmt.Println("error state", cs) - // fmt.Println(string(data)) - // for i := 0; i < p; i++ { - // fmt.Print(" ") - // } - // fmt.Println("^") - // fmt.Println(url) - return errors.New(fmt.Sprintf("bad url authority: %#v", string(data))) -} diff --git a/examples/gotocallret.rl b/examples/gotocallret.rl deleted file mode 100644 index 32c01a2c..00000000 --- a/examples/gotocallret.rl +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Demonstrate the use of goto, call and return. This machine expects either a - * lower case char or a digit as a command then a space followed by the command - * arg. If the command is a char, then the arg must be an a string of chars. - * If the command is a digit, then the arg must be a string of digits. This - * choice is determined by action code, rather than though transition - * desitinations. - */ - -#include -#include -#include -#include - -using namespace std; - -struct GotoCallRet -{ - char comm; - int cs, top, stack[32]; - - int init( ); - int execute( const char *data, int len, bool isEof ); - int finish( ); -}; - -%%{ - machine GotoCallRet; - - # Error machine, consumes to end of - # line, then starts the main line over. - garble_line := ( - (any-'\n')*'\n' - ) >{cout << "error: garbling line" << endl;} @{fgoto main;}; - - # Look for a string of alphas or of digits, - # on anything else, hold the character and return. - alp_comm := alpha+ $!{fhold;fret;}; - dig_comm := digit+ $!{fhold;fret;}; - - # Choose which to machine to call into based on the command. - action comm_arg { - if ( comm >= 'a' ) - fcall alp_comm; - else - fcall dig_comm; - } - - # Specifies command string. Note that the arg is left out. - command = ( - [a-z0-9] @{comm = fc;} ' ' @comm_arg '\n' - ) @{cout << "correct command" << endl;}; - - # Any number of commands. If there is an - # error anywhere, garble the line. - main := command* $!{fhold;fgoto garble_line;}; -}%% - -%% write data; - -int GotoCallRet::init( ) -{ - %% write init; - return 1; -} - -int GotoCallRet::execute( const char *data, int len, bool isEof ) -{ - const char *p = data; - const char *pe = data + len; - const char *eof = isEof ? pe : 0; - - %% write exec; - if ( cs == GotoCallRet_error ) - return -1; - if ( cs >= GotoCallRet_first_final ) - return 1; - return 0; -} - -#define BUFSIZE 1024 - -int main() -{ - char buf[BUFSIZE]; - - GotoCallRet gcr; - gcr.init(); - while ( fgets( buf, sizeof(buf), stdin ) != 0 ) - gcr.execute( buf, strlen(buf), false ); - - gcr.execute( 0, 0, true ); - if ( gcr.cs < GotoCallRet_first_final ) - cerr << "gotocallret: error: parsing input" << endl; - return 0; -} diff --git a/examples/mailbox.rl b/examples/mailbox.rl deleted file mode 100644 index 94590fdd..00000000 --- a/examples/mailbox.rl +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Parses unix mail boxes into headers and bodies. - */ - -#include -#include -#include -#include - -using namespace std; - -#define BUFSIZE 2048 - -/* A growable buffer for collecting headers. */ -struct Buffer -{ - Buffer() : data(0), allocated(0), length(0) { } - ~Buffer() { empty(); } - - void append( char p ) { - if ( ++length > allocated ) - upAllocate( length*2 ); - data[length-1] = p; - } - - void clear() { length = 0; } - void upAllocate( int len ); - void empty(); - - char *data; - int allocated; - int length; -}; - - -struct MailboxScanner -{ - Buffer headName; - Buffer headContent; - - int cs, top, stack[1]; - - int init( ); - int execute( const char *data, int len, bool isEof ); - int finish( ); -}; - -%%{ - machine MailboxScanner; - - # Buffer the header names. - action bufHeadName { headName.append(fc); } - - # Prints a blank line after the end of the headers of each message. - action blankLine { cout << endl; } - - # Helpers we will use in matching the date section of the from line. - day = /[A-Z][a-z][a-z]/; - month = /[A-Z][a-z][a-z]/; - year = /[0-9][0-9][0-9][0-9]/; - time = /[0-9][0-9]:[0-9][0-9]/ . ( /:[0-9][0-9]/ | '' ); - letterZone = /[A-Z][A-Z][A-Z]/; - numZone = /[+\-][0-9][0-9][0-9][0-9]/; - zone = letterZone | numZone; - dayNum = /[0-9 ][0-9]/; - - # These are the different formats of the date minus an obscure - # type that has a funny string 'remote from xxx' on the end. Taken - # from c-client in the imap-2000 distribution. - date = day . ' ' . month . ' ' . dayNum . ' ' . time . ' ' . - ( year | year . ' ' . zone | zone . ' ' . year ); - - # From lines separate messages. We will exclude fromLine from a message - # body line. This will cause us to stay in message line up until an - # entirely correct from line is matched. - fromLine = 'From ' . (any-'\n')* . ' ' . date . '\n'; - - # The types of characters that can be used as a header name. - hchar = print - [ :]; - - # Simply eat up an uninteresting header. Return at the first non-ws - # character following a newline. - consumeHeader := ( - [^\n] | - '\n' [ \t] | - '\n' [^ \t] @{fhold; fret;} - )*; - - action hchar {headContent.append(fc);} - action hspace {headContent.append(' ');} - - action hfinish { - headContent.append(0); - cout << headContent.data << endl; - headContent.clear(); - fhold; - fret; - } - - # Display the contents of a header as it is consumed. Collapses line - # continuations to a single space. - printHeader := ( - [^\n] @hchar | - ( '\n' ( [ \t]+ '\n' )* [ \t]+ ) %hspace - )** $!hfinish; - - action onHeader - { - headName.append(0); - if ( strcmp( headName.data, "From" ) == 0 || - strcmp( headName.data, "To" ) == 0 || - strcmp( headName.data, "Subject" ) == 0 ) - { - /* Print the header name, then jump to a machine the will display - * the contents. */ - cout << headName.data << ":"; - headName.clear(); - fcall printHeader; - } - - headName.clear(); - fcall consumeHeader; - } - - header = hchar+ $bufHeadName ':' @onHeader; - - # Exclude fromLine from a messageLine, otherwise when encountering a - # fromLine we will be simultaneously matching the old message and a new - # message. - messageLine = ( [^\n]* '\n' - fromLine ); - - # An entire message. - message = ( fromLine . header* . '\n' @blankLine . messageLine* ); - - # File is a series of messages. - main := message*; -}%% - -%% write data; - -int MailboxScanner::init( ) -{ - %% write init; - return 1; -} - -int MailboxScanner::execute( const char *data, int len, bool isEof ) -{ - const char *p = data; - const char *pe = data + len; - const char *eof = isEof ? pe : 0; - - %% write exec; - - if ( cs == MailboxScanner_error ) - return -1; - if ( cs >= MailboxScanner_first_final ) - return 1; - return 0; -} - -int MailboxScanner::finish( ) -{ - if ( cs == MailboxScanner_error ) - return -1; - if ( cs >= MailboxScanner_first_final ) - return 1; - return 0; -} - - -void Buffer::empty() -{ - if ( data != 0 ) { - free( data ); - - data = 0; - length = 0; - allocated = 0; - } -} - -void Buffer::upAllocate( int len ) -{ - if ( data == 0 ) - data = (char*) malloc( len ); - else - data = (char*) realloc( data, len ); - allocated = len; -} - -MailboxScanner mailbox; -char buf[BUFSIZE]; - -int main() -{ - mailbox.init(); - while ( 1 ) { - int len = fread( buf, 1, BUFSIZE, stdin ); - mailbox.execute( buf, len, len != BUFSIZE ); - if ( len != BUFSIZE ) - break; - } - if ( mailbox.finish() <= 0 ) - cerr << "mailbox: error parsing input" << endl; - return 0; -} diff --git a/examples/params.rl b/examples/params.rl deleted file mode 100644 index a8ffeae9..00000000 --- a/examples/params.rl +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Parse command line arguments. - */ - -#include -#include - -#define BUFLEN 1024 - -struct params -{ - char buffer[BUFLEN+1]; - int buflen; - int cs; -}; - -%%{ - machine params; - access fsm->; - - # A buffer to collect argurments - - # Append to the buffer. - action append { - if ( fsm->buflen < BUFLEN ) - fsm->buffer[fsm->buflen++] = fc; - } - - # Terminate a buffer. - action term { - if ( fsm->buflen < BUFLEN ) - fsm->buffer[fsm->buflen++] = 0; - } - - # Clear out the buffer - action clear { fsm->buflen = 0; } - - action help { printf("help\n"); } - action version { printf("version\n"); } - action output { printf("output: \"%s\"\n", fsm->buffer); } - action spec { printf("spec: \"%s\"\n", fsm->buffer); } - action mach { printf("machine: \"%s\"\n", fsm->buffer); } - - # Helpers that collect strings - string = [^\0]+ >clear $append %term; - - # Different arguments. - help = ( '-h' | '-H' | '-?' | '--help' ) 0 @help; - version = ( '-v' | '--version' ) 0 @version; - output = '-o' 0? string 0 @output; - spec = '-S' 0? string 0 @spec; - mach = '-M' 0? string 0 @mach; - - main := ( - help | - version | - output | - spec | - mach - )*; -}%% - -%% write data; - -void params_init( struct params *fsm ) -{ - fsm->buflen = 0; - %% write init; -} - -void params_execute( struct params *fsm, const char *data, int len ) -{ - const char *p = data; - const char *pe = data + len; - - %% write exec; -} - -int params_finish( struct params *fsm ) -{ - if ( fsm->cs == params_error ) - return -1; - if ( fsm->cs >= params_first_final ) - return 1; - return 0; -} - -#define BUFSIZE 2048 - -int main( int argc, char **argv ) -{ - int a; - struct params params; - - params_init( ¶ms ); - for ( a = 1; a < argc; a++ ) - params_execute( ¶ms, argv[a], strlen(argv[a])+1 ); - if ( params_finish( ¶ms ) != 1 ) - fprintf( stderr, "params: error processing arguments\n" ); - - return 0; -} diff --git a/examples/pullscan.rl b/examples/pullscan.rl deleted file mode 100644 index d9e8a579..00000000 --- a/examples/pullscan.rl +++ /dev/null @@ -1,170 +0,0 @@ -#include -#include -#include - -#define BUFSIZE 4096 - -typedef struct _Scanner { - /* Scanner state. */ - int cs; - int act; - int have; - int curline; - char *ts; - char *te; - char *p; - char *pe; - char *eof; - FILE *file; - int done; - - /* Token data */ - char *data; - int len; - int value; - - char buf[BUFSIZE]; -} Scanner; - - -%%{ - machine Scanner; - write data; -}%% - -void scan_init( Scanner *s, FILE *file ) -{ - memset (s, '\0', sizeof(Scanner)); - s->curline = 1; - s->file = file; - s->eof = 0; - %% write init; -} - -#define TK_NO_TOKEN (-1) -#define TK_ERR 128 -#define TK_EOF 129 -#define TK_Identifier 130 -#define TK_Number 131 -#define TK_String 132 - -#define ret_tok( _tok ) token = _tok; s->data = s->ts - -int scan( Scanner *s ) -{ - int token = TK_NO_TOKEN; - int space, readlen; - - while ( 1 ) { - if ( s->p == s->pe ) { - printf("scanner: need more data\n"); - - if ( s->ts == 0 ) - s->have = 0; - else { - /* There is data that needs to be shifted over. */ - printf("scanner: buffer broken mid token\n"); - s->have = s->pe - s->ts; - memmove( s->buf, s->ts, s->have ); - s->te -= (s->ts-s->buf); - s->ts = s->buf; - } - - s->p = s->buf + s->have; - space = BUFSIZE - s->have; - - if ( space == 0 ) { - /* We filled up the buffer trying to scan a token. */ - printf("scanner: out of buffer space\n"); - return TK_ERR; - } - - if ( s->done ) { - printf("scanner: end of file\n"); - s->p[0] = 0; - readlen = 1; - } - else { - readlen = fread( s->p, 1, space, s->file ); - if ( readlen < space ) - s->done = 1; - } - - s->pe = s->p + readlen; - } - - %%{ - machine Scanner; - access s->; - variable p s->p; - variable pe s->pe; - variable eof s->eof; - - main := |* - - # Identifiers - ( [a-zA-Z_] [a-zA-Z0-9_]* ) => - { ret_tok( TK_Identifier ); fbreak; }; - - # Whitespace - [ \t\n]; - - '"' ( [^\\"] | '\\' any ) * '"' => - { ret_tok( TK_String ); fbreak; }; - - # Number - digit+ => - { ret_tok( TK_Number ); fbreak; }; - - # EOF - 0 => - { ret_tok( TK_EOF ); fbreak; }; - - # Anything else - any => - { ret_tok( *s->p ); fbreak; }; - - *|; - - write exec; - }%% - - if ( s->cs == Scanner_error ) - return TK_ERR; - - if ( token != TK_NO_TOKEN ) { - s->len = s->p - s->data; - return token; - } - } -} - - -int main (int argc, char** argv) -{ - Scanner ss; - int tok; - - scan_init(&ss, stdin); - - while ( 1 ) { - tok = scan (&ss); - if ( tok == TK_EOF ) { - printf ("parser: EOF\n"); - break; - } - else if ( tok == TK_ERR ) { - printf ("parser: ERR\n"); - break; - } - else { - printf ("parser: %d \"", tok); - fwrite ( ss.data, 1, ss.len, stdout ); - printf ("\"\n" ); - } - } - - return 0; -} - - diff --git a/examples/rlscan.rl b/examples/rlscan.rl deleted file mode 100644 index d4d4bf97..00000000 --- a/examples/rlscan.rl +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Lexes Ragel input files. - */ - -#include -#include -#include -#include - -using namespace std; - -void escapeXML( char *data ) -{ - while ( *data != 0 ) { - switch ( *data ) { - case '<': cout << "<"; break; - case '>': cout << ">"; break; - case '&': cout << "&"; break; - default: cout << *data; break; - } - data += 1; - } -} - -void escapeXML( char c ) -{ - switch ( c ) { - case '<': cout << "<"; break; - case '>': cout << ">"; break; - case '&': cout << "&"; break; - default: cout << c; break; - } -} - -void escapeXML( char *data, int len ) -{ - for ( char *end = data + len; data != end; data++ ) { - switch ( *data ) { - case '<': cout << "<"; break; - case '>': cout << ">"; break; - case '&': cout << "&"; break; - default: cout << *data; break; - } - } -} - -inline void write( const char *data ) -{ - cout << data; -} - -inline void write( char c ) -{ - cout << c; -} - -inline void write( char *data, int len ) -{ - cout.write( data, len ); -} - - -%%{ - machine RagelScan; - - word = [a-zA-Z_][a-zA-Z_0-9]*; - integer = [0-9]+; - hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; - - default = ^0; - EOF = 0; - - # Handles comments in outside code and inline blocks. - c_comment := - ( default* :>> '*/' ) - ${ escapeXML( fc ); } - @{ fret; }; - - action emit { - escapeXML( ts, te-ts ); - } - - # - # Inline action code - # - - ilscan := |* - - "'" ( [^'\\] | /\\./ )* "'" => emit; - '"' ( [^"\\] | /\\./ )* '"' => emit; - '/*' { - write( "/*" ); - fcall c_comment; - }; - '//' [^\n]* '\n' => emit; - - '{' { - write( '{' ); - inline_depth += 1; - }; - - '}' { - write( '}' ); - /* If dropping down to the last } then return - * to ragel code. */ - if ( --inline_depth == 0 ) { - write( "\n" ); - fgoto rlscan; - } - }; - - default => { escapeXML( *ts ); }; - *|; - - # - # Ragel Tokens - # - - rlscan := |* - '}%%' { - if ( !single_line ) { - write( "\n" ); - fgoto main; - } - }; - - '\n' { - if ( single_line ) { - write( "\n" ); - fgoto main; - } - }; - - # Word - word { - write( "" ); - write( ts, te-ts ); - write( "\n" ); - }; - - # Decimal integer. - integer { - write( "" ); - write( ts, te-ts ); - write( "\n" ); - }; - - # Hexidecimal integer. - hex { - write( "" ); - write( ts, te-ts ); - write( "\n" ); - }; - - # Consume comments. - '#' [^\n]* '\n'; - - # Single literal string. - "'" ( [^'\\] | /\\./ )* "'" { - write( "" ); - escapeXML( ts, te-ts ); - write( "\n" ); - }; - - # Double literal string. - '"' ( [^"\\] | /\\./ )* '"' { - write( "" ); - escapeXML( ts, te-ts ); - write( "\n" ); - }; - - # Or literal. - '[' ( [^\]\\] | /\\./ )* ']' { - write( "" ); - escapeXML( ts, te-ts ); - write( "\n" ); - }; - - # Regex Literal. - '/' ( [^/\\] | /\\./ ) * '/' { - write( "" ); - escapeXML( ts, te-ts ); - write( "\n" ); - }; - - # Open an inline block - '{' { - inline_depth = 1; - write( "{" ); - fgoto ilscan; - }; - - punct { - write( "" ); - escapeXML( fc ); - write( "\n" ); - }; - - default; - *|; - - # - # Outside code. - # - - main := |* - - "'" ( [^'\\] | /\\./ )* "'" => emit; - '"' ( [^"\\] | /\\./ )* '"' => emit; - - '/*' { - escapeXML( ts, te-ts ); - fcall c_comment; - }; - - '//' [^\n]* '\n' => emit; - - '%%{' { - write( "
\n" ); - single_line = false; - fgoto rlscan; - }; - - '%%' { - write( "
\n" ); - single_line = true; - fgoto rlscan; - }; - - default { - escapeXML( *ts ); - }; - - # EOF. - EOF; - *|; -}%% - -%% write data nofinal; - -#define BUFSIZE 2048 - -int main() -{ - std::ios::sync_with_stdio(false); - - int cs, act; - char *ts, *te; - int stack[1], top; - - static char inbuf[BUFSIZE]; - bool single_line = false; - int inline_depth = 0; - - %% write init; - - bool done = false; - int have = 0; - while ( !done ) { - /* How much space is in the buffer? */ - int space = BUFSIZE - have; - if ( space == 0 ) { - /* Buffer is full. */ - cerr << "TOKEN TOO BIG" << endl; - exit(1); - } - - /* Read in a block. */ - char *p = inbuf + have; - cin.read( p, space ); - int len = cin.gcount(); - char *pe = p + len; - char *eof = 0; - - /* Check for EOF. */ - if ( len == 0 ) { - eof = pe; - done = true; - } - - %% write exec; - - if ( cs == RagelScan_error ) { - /* Machine failed before finding a token. */ - cerr << "PARSE ERROR" << endl; - exit(1); - } - - if ( ts == 0 ) - have = 0; - else { - /* There is a prefix to preserve, shift it over. */ - have = pe - ts; - memmove( inbuf, ts, have ); - te = inbuf + (te-ts); - ts = inbuf; - } - } - return 0; -} diff --git a/examples/statechart.rl b/examples/statechart.rl deleted file mode 100644 index a04471b5..00000000 --- a/examples/statechart.rl +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Demonstrate the use of labels, the epsilon operator, and the join operator - * for creating machines using the named state and transition list paradigm. - * This implementes the same machine as the atoi example. - */ - -#include -#include -#include -#include - -using namespace std; - -struct StateChart -{ - bool neg; - int val; - int cs; - - int init( ); - int execute( const char *data, int len ); - int finish( ); -}; - -%%{ - machine StateChart; - - action begin { - neg = false; - val = 0; - } - - action see_neg { - neg = true; - } - - action add_digit { - val = val * 10 + (fc - '0'); - } - - action finish { - if ( neg ) - val = -1 * val; - } - - atoi = ( - start: ( - '-' @see_neg ->om_num | - '+' ->om_num | - [0-9] @add_digit ->more_nums - ), - - # One or more nums. - om_num: ( - [0-9] @add_digit ->more_nums - ), - - # Zero ore more nums. - more_nums: ( - [0-9] @add_digit ->more_nums | - '' -> final - ) - ) >begin %finish; - - main := ( atoi '\n' @{ cout << val << endl; } )*; -}%% - -%% write data; - -int StateChart::init( ) -{ - neg = false; - val = false; - %% write init; - return 1; -} - -int StateChart::execute( const char *data, int len ) -{ - const char *p = data; - const char *pe = data + len; - - %% write exec; - - if ( cs == StateChart_error ) - return -1; - if ( cs >= StateChart_first_final ) - return 1; - return 0; -} - -int StateChart::finish( ) -{ - if ( cs == StateChart_error ) - return -1; - if ( cs >= StateChart_first_final ) - return 1; - return 0; -} - - -#define BUFSIZE 1024 - -int main() -{ - char buf[BUFSIZE]; - - StateChart atoi; - atoi.init(); - while ( fgets( buf, sizeof(buf), stdin ) != 0 ) { - atoi.execute( buf, strlen(buf) ); - } - if ( atoi.finish() <= 0 ) - cerr << "statechart: error: parsing input" << endl; - return 0; -} diff --git a/examples/uri.rl b/examples/uri.rl deleted file mode 100644 index 185a76c6..00000000 --- a/examples/uri.rl +++ /dev/null @@ -1,31 +0,0 @@ -%%{ - machine uri; - - action scheme {} - action loc {} - action item {} - action query {} - action last {} - action nothing {} - - main := - # Scheme machine. This is ambiguous with the item machine. We commit - # to the scheme machine on colon. - ( [^:/?#]+ ':' @(colon,1) @scheme )? - - # Location machine. This is ambiguous with the item machine. We remain - # ambiguous until a second slash, at that point and all points after - # we place a higher priority on staying in the location machine over - # moving into the item machine. - ( ( '/' ( '/' [^/?#]* ) $(loc,1) ) %loc %/loc )? - - # Item machine. Ambiguous with both scheme and location, which both - # get a higher priority on the characters causing ambiguity. - ( ( [^?#]+ ) $(loc,0) $(colon,0) %item %/item )? - - # Last two components, the characters that initiate these machines are - # not supported in any previous components, therefore there are no - # ambiguities introduced by these parts. - ( '?' [^#]* %query %/query)? - ( '#' any* %/last )?; -}%% -- cgit v1.2.1