diff options
author | Adrian Thurston <thurston@colm.net> | 2019-09-11 18:22:31 -0600 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-09-11 18:22:31 -0600 |
commit | e4f23077edf61818128b355f2aab2b900702ea97 (patch) | |
tree | 05d3294062b259a3d72e277950e1364c50bbea07 /examples/rlscan.rl | |
parent | bccaa853593339c2bac8ddede25f18e1afc91597 (diff) | |
download | colm-e4f23077edf61818128b355f2aab2b900702ea97.tar.gz |
unifying some of the top-level components
including vim syntax, CREDITS, COPYING, examples, contrib.
Diffstat (limited to 'examples/rlscan.rl')
-rw-r--r-- | examples/rlscan.rl | 300 |
1 files changed, 300 insertions, 0 deletions
diff --git a/examples/rlscan.rl b/examples/rlscan.rl new file mode 100644 index 00000000..d4d4bf97 --- /dev/null +++ b/examples/rlscan.rl @@ -0,0 +1,300 @@ +/* + * Lexes Ragel input files. + */ + +#include <iostream> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +using namespace std; + +void escapeXML( char *data ) +{ + while ( *data != 0 ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + data += 1; + } +} + +void escapeXML( char c ) +{ + switch ( c ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << c; break; + } +} + +void escapeXML( char *data, int len ) +{ + for ( char *end = data + len; data != end; data++ ) { + switch ( *data ) { + case '<': cout << "<"; break; + case '>': cout << ">"; break; + case '&': cout << "&"; break; + default: cout << *data; break; + } + } +} + +inline void write( const char *data ) +{ + cout << data; +} + +inline void write( char c ) +{ + cout << c; +} + +inline void write( char *data, int len ) +{ + cout.write( data, len ); +} + + +%%{ + machine RagelScan; + + word = [a-zA-Z_][a-zA-Z_0-9]*; + integer = [0-9]+; + hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; + + default = ^0; + EOF = 0; + + # Handles comments in outside code and inline blocks. + c_comment := + ( default* :>> '*/' ) + ${ escapeXML( fc ); } + @{ fret; }; + + action emit { + escapeXML( ts, te-ts ); + } + + # + # Inline action code + # + + ilscan := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + '/*' { + write( "/*" ); + fcall c_comment; + }; + '//' [^\n]* '\n' => emit; + + '{' { + write( '{' ); + inline_depth += 1; + }; + + '}' { + write( '}' ); + /* If dropping down to the last } then return + * to ragel code. */ + if ( --inline_depth == 0 ) { + write( "</inline>\n" ); + fgoto rlscan; + } + }; + + default => { escapeXML( *ts ); }; + *|; + + # + # Ragel Tokens + # + + rlscan := |* + '}%%' { + if ( !single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + '\n' { + if ( single_line ) { + write( "</section>\n" ); + fgoto main; + } + }; + + # Word + word { + write( "<word>" ); + write( ts, te-ts ); + write( "</word>\n" ); + }; + + # Decimal integer. + integer { + write( "<int>" ); + write( ts, te-ts ); + write( "</int>\n" ); + }; + + # Hexidecimal integer. + hex { + write( "<hex>" ); + write( ts, te-ts ); + write( "</hex>\n" ); + }; + + # Consume comments. + '#' [^\n]* '\n'; + + # Single literal string. + "'" ( [^'\\] | /\\./ )* "'" { + write( "<single_lit>" ); + escapeXML( ts, te-ts ); + write( "</single_lit>\n" ); + }; + + # Double literal string. + '"' ( [^"\\] | /\\./ )* '"' { + write( "<double_lit>" ); + escapeXML( ts, te-ts ); + write( "</double_lit>\n" ); + }; + + # Or literal. + '[' ( [^\]\\] | /\\./ )* ']' { + write( "<or_lit>" ); + escapeXML( ts, te-ts ); + write( "</or_lit>\n" ); + }; + + # Regex Literal. + '/' ( [^/\\] | /\\./ ) * '/' { + write( "<re_lit>" ); + escapeXML( ts, te-ts ); + write( "</re_lit>\n" ); + }; + + # Open an inline block + '{' { + inline_depth = 1; + write( "<inline>{" ); + fgoto ilscan; + }; + + punct { + write( "<symbol>" ); + escapeXML( fc ); + write( "</symbol>\n" ); + }; + + default; + *|; + + # + # Outside code. + # + + main := |* + + "'" ( [^'\\] | /\\./ )* "'" => emit; + '"' ( [^"\\] | /\\./ )* '"' => emit; + + '/*' { + escapeXML( ts, te-ts ); + fcall c_comment; + }; + + '//' [^\n]* '\n' => emit; + + '%%{' { + write( "<section>\n" ); + single_line = false; + fgoto rlscan; + }; + + '%%' { + write( "<section>\n" ); + single_line = true; + fgoto rlscan; + }; + + default { + escapeXML( *ts ); + }; + + # EOF. + EOF; + *|; +}%% + +%% write data nofinal; + +#define BUFSIZE 2048 + +int main() +{ + std::ios::sync_with_stdio(false); + + int cs, act; + char *ts, *te; + int stack[1], top; + + static char inbuf[BUFSIZE]; + bool single_line = false; + int inline_depth = 0; + + %% write init; + + bool done = false; + int have = 0; + while ( !done ) { + /* How much space is in the buffer? */ + int space = BUFSIZE - have; + if ( space == 0 ) { + /* Buffer is full. */ + cerr << "TOKEN TOO BIG" << endl; + exit(1); + } + + /* Read in a block. */ + char *p = inbuf + have; + cin.read( p, space ); + int len = cin.gcount(); + char *pe = p + len; + char *eof = 0; + + /* Check for EOF. */ + if ( len == 0 ) { + eof = pe; + done = true; + } + + %% write exec; + + if ( cs == RagelScan_error ) { + /* Machine failed before finding a token. */ + cerr << "PARSE ERROR" << endl; + exit(1); + } + + if ( ts == 0 ) + have = 0; + else { + /* There is a prefix to preserve, shift it over. */ + have = pe - ts; + memmove( inbuf, ts, have ); + te = inbuf + (te-ts); + ts = inbuf; + } + } + return 0; +} |