summaryrefslogtreecommitdiff
path: root/test/clang4.rl
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2014-10-13 19:14:30 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2014-10-13 19:14:30 +0000
commiteafd7a3974e8605fd02794269db6114a3446e016 (patch)
tree064737b35dbe10f2995753ead92f95bac30ba048 /test/clang4.rl
downloadragel-tarball-eafd7a3974e8605fd02794269db6114a3446e016.tar.gz
ragel-6.9ragel-6.9
Diffstat (limited to 'test/clang4.rl')
-rw-r--r--test/clang4.rl188
1 files changed, 188 insertions, 0 deletions
diff --git a/test/clang4.rl b/test/clang4.rl
new file mode 100644
index 0000000..c3bb399
--- /dev/null
+++ b/test/clang4.rl
@@ -0,0 +1,188 @@
+/*
+ * @LANG: indep
+ * @NEEDS_EOF: yes
+ */
+
+char array[32];
+int pos;
+int line;
+%%
+pos = 0;
+line = 1;
+%%{
+ machine clang;
+
+ # Function to buffer a character.
+ action bufChar { array[pos] = fc; pos = pos + 1; }
+
+ # Function to clear the buffer.
+ action clearBuf { pos = 0; }
+
+ # Functions to dump tokens as they are matched.
+ action ident {
+ prints "ident(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action literal {
+ prints "literal(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action float {
+ prints "float(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action integer {
+ prints "int(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action hex {
+ prints "hex(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+ action symbol {
+ prints "symbol(";
+ printi line;
+ prints ",";
+ printi pos;
+ prints "): ";
+ printb array;
+ prints "\n";
+ }
+
+ # Alpha numberic characters or underscore.
+ alnumu = alnum | '_';
+
+ # Alpha charactres or underscore.
+ alphau = alpha | '_';
+
+ # Symbols. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving dump the symbol.
+ symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol;
+
+ # Identifier. Upon entering clear the buffer. On all transitions
+ # buffer a character. Upon leaving, dump the identifier.
+ ident = (alphau . alnumu*) >clearBuf $bufChar %ident;
+
+ # Match single characters inside literal strings. Or match
+ # an escape sequence. Buffers the charater matched.
+ sliteralChar =
+ ( extend - ['\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+ dliteralChar =
+ ( extend - ["\\] ) @bufChar |
+ ( '\\' . extend @bufChar );
+
+ # Single quote and double quota literals. At the start clear
+ # the buffer. Upon leaving dump the literal.
+ sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal;
+ dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal;
+ literal = sliteral | dliteral;
+
+ # Whitespace is standard ws, newlines and control codes.
+ whitespace = any - 33 .. 126;
+
+ # Describe both c style comments and c++ style comments. The
+ # priority bump on tne terminator of the comments brings us
+ # out of the extend* which matches everything.
+ ccComment = '//' . extend* $0 . '\n' @1;
+ cComment = '/!' . extend* $0 . '!/' @1;
+
+ # Match an integer. We don't bother clearing the buf or filling it.
+ # The float machine overlaps with int and it will do it.
+ integer = digit+ %integer;
+
+ # Match a float. Upon entering the machine clear the buf, buffer
+ # characters on every trans and dump the float upon leaving.
+ float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float;
+
+ # Match a hex. Upon entering the hex part, clear the buf, buffer characters
+ # on every trans and dump the hex on leaving transitions.
+ hex = '0x' . xdigit+ >clearBuf $bufChar %hex;
+
+ # Or together all the lanuage elements.
+ fin = ( ccComment |
+ cComment |
+ symbol |
+ ident |
+ literal |
+ whitespace |
+ integer |
+ float |
+ hex );
+
+ # Star the language elements. It is critical in this type of application
+ # that we decrease the priority of out transitions before doing so. This
+ # is so that when we see 'aa' we stay in the fin machine to match an ident
+ # of length two and not wrap around to the front to match two idents of
+ # length one.
+ clang_main = ( fin $1 %0 )*;
+
+ # This machine matches everything, taking note of newlines.
+ newline = ( any | '\n' @{ line = line + 1; } )*;
+
+ # The final fsm is the lexer intersected with the newline machine which
+ # will count lines for us. Since the newline machine accepts everything,
+ # the strings accepted is goverened by the clang_main machine, onto which
+ # the newline machine overlays line counting.
+ main := clang_main & newline;
+}%%
+/* _____INPUT_____
+"999 0xaAFF99 99.99 /!\n!/ 'lksdj' //\n\"\n\nliteral\n\n\n\"0x00aba foobardd.ddsf 0x0.9\n"
+"wordwithnum00asdf\n000wordfollowsnum,makes new symbol\n\nfinishing early /! unfinished ...\n"
+_____INPUT_____ */
+/* _____OUTPUT_____
+int(1,3): 999
+hex(1,6): aAFF99
+float(1,5): 99.99
+literal(2,5): lksdj
+literal(8,12):
+
+literal
+
+
+
+hex(8,5): 00aba
+ident(8,8): foobardd
+symbol(8,1): .
+ident(8,4): ddsf
+hex(8,1): 0
+symbol(8,1): .
+int(8,1): 9
+ACCEPT
+ident(1,17): wordwithnum00asdf
+int(2,3): 000
+ident(2,14): wordfollowsnum
+symbol(2,1): ,
+ident(2,5): makes
+ident(2,3): new
+ident(2,6): symbol
+ident(4,9): finishing
+ident(4,5): early
+FAIL
+_____OUTPUT_____ */
+